diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -798,6 +798,7 @@ virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads); /// Emits a critical region. @@ -1727,6 +1728,7 @@ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads) override; /// Emits a critical region. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1263,7 +1263,7 @@ CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); + return CGF.GenerateOpenMPCapturedStmtFunctionAggregate(*CS, D.getBeginLoc()); } llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( @@ -2036,11 +2036,11 @@ CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } -void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { if (!CGF.HaveInsertPoint()) return; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); @@ -12388,12 +12388,11 @@ llvm_unreachable("Not supported in SIMD-only mode"); } -void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPSIMDRuntime::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -256,6 +256,7 @@ void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, llvm::Value *NumThreads) override; /// Emit an implicit/explicit barrier for OpenMP threads. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1210,18 +1210,17 @@ emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } -void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF, - SourceLocation Loc, - llvm::Function *OutlinedFn, - ArrayRef CapturedVars, - const Expr *IfCond, - llvm::Value *NumThreads) { +void CGOpenMPRuntimeGPU::emitParallelCall( + CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, + ArrayRef CapturedVars, + ArrayRef CapturedVarsElemTypes, const Expr *IfCond, + llvm::Value *NumThreads) { if (!CGF.HaveInsertPoint()) return; - auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, - NumThreads](CodeGenFunction &CGF, - PrePostActionTy &Action) { + auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, + CapturedVarsElemTypes, IfCond, NumThreads]( + CodeGenFunction &CGF, PrePostActionTy &Action) { CGBuilderTy &Bld = CGF.Builder; llvm::Value *NumThreadsVal = NumThreads; llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn]; @@ -1235,25 +1234,49 @@ // TODO: Is that needed? CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF); - Address CapturedVarsAddrs = CGF.CreateDefaultAlignTempAlloca( - llvm::ArrayType::get(CGM.VoidPtrTy, CapturedVars.size()), - "captured_vars_addrs"); - // There's something to share. - if (!CapturedVars.empty()) { - // Prepare for parallel region. Indicate the outlined function. - ASTContext &Ctx = CGF.getContext(); - unsigned Idx = 0; - for (llvm::Value *V : CapturedVars) { - Address Dst = Bld.CreateConstArrayGEP(CapturedVarsAddrs, Idx); - llvm::Value *PtrV; - if (V->getType()->isIntegerTy()) - PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy); - else - PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy); - CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false, - Ctx.getPointerType(Ctx.VoidPtrTy)); - ++Idx; - } + assert(CapturedVars.size() == 1 && + "Expected single aggregate argument to outlined function"); + + // Globalize the single aggregate argument, if needed, or use a local + // alloca, or emit null when there are no arguments. + llvm::Value *AggregateV = CapturedVars[0]; + assert(AggregateV->getType()->isPointerTy() && + "Expected pointer type for aggregate argument."); + + assert(CapturedVarsElemTypes.size() == 1 && + "Expected single element in array of types."); + llvm::Type *PtrElemTy = CapturedVarsElemTypes[0]; + auto &DL = CGM.getDataLayout(); + unsigned AllocSize = DL.getTypeAllocSize(PtrElemTy); + + llvm::CallBase *GlobalPtr = nullptr; + llvm::Value *AggregatePtr = nullptr; + + if (AllocSize) { + llvm::AllocaInst *LocalAlloc = + CGF.CreateTempAlloca(PtrElemTy, ".tmp.outlined.agg.arg"); + llvm::Value *LocalPtr = Bld.CreatePointerCast(LocalAlloc, CGF.VoidPtrTy); + GlobalPtr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_shared), + {llvm::ConstantInt::get(CGM.SizeTy, AllocSize)}); + GlobalPtr->addRetAttr(llvm::Attribute::get( + CGM.getLLVMContext(), llvm::Attribute::Alignment, + CGM.getContext().getTargetInfo().getNewAlign() / 8)); + + llvm::Value *AllocArgs[] = {LocalPtr, GlobalPtr}; + AggregatePtr = CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_aggregate_arg), + AllocArgs); + + llvm::Value *CapturedVarVal = Bld.CreateAlignedLoad( + PtrElemTy, AggregateV, DL.getABITypeAlign(PtrElemTy)); + llvm::Value *AggregatePtrCast = Bld.CreatePointerBitCastOrAddrSpaceCast( + AggregatePtr, PtrElemTy->getPointerTo()); + Bld.CreateDefaultAlignedStore(CapturedVarVal, AggregatePtrCast); + } else { + AggregatePtr = llvm::Constant::getNullValue(OMPBuilder.VoidPtr); } llvm::Value *IfCondVal = nullptr; @@ -1269,21 +1292,29 @@ NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty), assert(IfCondVal && "Expected a value"); + assert(AggregatePtr && "Expected non-null aggregate pointer value"); + // Create the parallel call. llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *Args[] = { - RTLoc, - getThreadID(CGF, Loc), - IfCondVal, - NumThreadsVal, - llvm::ConstantInt::get(CGF.Int32Ty, -1), - FnPtr, - ID, - Bld.CreateBitOrPointerCast(CapturedVarsAddrs.getPointer(), - CGF.VoidPtrPtrTy), - llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())}; + llvm::Value *Args[] = {RTLoc, + getThreadID(CGF, Loc), + IfCondVal, + NumThreadsVal, + llvm::ConstantInt::get(CGF.Int32Ty, -1), + FnPtr, + ID, + AggregatePtr}; CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_parallel_51), Args); + + if (AllocSize) { + assert(GlobalPtr && "Expected non-null global pointer value"); + // Pop global memory used for argument allocation. + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), + OMPRTL___kmpc_free_shared), + {GlobalPtr, llvm::ConstantInt::get(CGM.SizeTy, AllocSize)}); + } }; RegionCodeGenTy RCG(ParallelGen); @@ -3221,7 +3252,6 @@ D.getBeginLoc(), D.getBeginLoc()); const auto *RD = CS.getCapturedRecordDecl(); - auto CurField = RD->field_begin(); Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); @@ -3233,70 +3263,44 @@ Args.emplace_back(ZeroAddr.getPointer()); CGBuilderTy &Bld = CGF.Builder; - auto CI = CS.capture_begin(); // Use global memory for data sharing. // Handle passing of global args to workers. Address GlobalArgs = - CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args"); + CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrTy, "global_args"); llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer(); llvm::Value *DataSharingArgs[] = {GlobalArgsPtr}; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_shared_variables), - DataSharingArgs); + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_get_shared_variables_aggregate), + DataSharingArgs); // Retrieve the shared variables from the list of references returned // by the runtime. Pass the variables to the outlined function. - Address SharedArgListAddress = Address::invalid(); - if (CS.capture_size() > 0 || - isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - SharedArgListAddress = CGF.EmitLoadOfPointer( + Address SharedArgAggregateAddress = Address::invalid(); + if (CS.capture_size() > 0) { + SharedArgAggregateAddress = CGF.EmitLoadOfPointer( GlobalArgs, CGF.getContext() - .getPointerType(CGF.getContext().VoidPtrTy) + .getPointerType(CGF.getContext().VoidTy) .castAs()); - } - unsigned Idx = 0; - if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) { - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); + // Load the outlined arg aggregate struct. + ASTContext &CGFContext = CGF.getContext(); + QualType RecordPointerTy = + CGFContext.getPointerType(CGFContext.getRecordType(RD)); Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy); - llvm::Value *LB = CGF.EmitLoadOfScalar( - TypedAddress, - /*Volatile=*/false, - CGF.getContext().getPointerType(CGF.getContext().getSizeType()), - cast(D).getLowerBoundVariable()->getExprLoc()); - Args.emplace_back(LB); - ++Idx; - Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx); - TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.SizeTy->getPointerTo(), CGF.SizeTy); - llvm::Value *UB = CGF.EmitLoadOfScalar( - TypedAddress, - /*Volatile=*/false, - CGF.getContext().getPointerType(CGF.getContext().getSizeType()), - cast(D).getUpperBoundVariable()->getExprLoc()); - Args.emplace_back(UB); - ++Idx; - } - if (CS.capture_size() > 0) { + SharedArgAggregateAddress, + CGF.ConvertTypeForMem(RecordPointerTy)->getPointerTo(), + CGF.ConvertTypeForMem(RecordPointerTy)); + llvm::Value *Arg = TypedAddress.getPointer(); + Args.emplace_back(Arg); + } else { + // If there are no captured arguments, use nullptr. ASTContext &CGFContext = CGF.getContext(); - for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) { - QualType ElemTy = CurField->getType(); - Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx); - Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast( - Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)), - CGF.ConvertTypeForMem(ElemTy)); - llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress, - /*Volatile=*/false, - CGFContext.getPointerType(ElemTy), - CI->getLocation()); - if (CI->capturesVariableByCopy() && - !CI->getCapturedVar()->getType()->isAnyPointerType()) { - Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(), - CI->getLocation()); - } - Args.emplace_back(Arg); - } + QualType RecordPointerTy = + CGFContext.getPointerType(CGFContext.getRecordType(RD)); + llvm::Value *Arg = + llvm::Constant::getNullValue(CGF.ConvertTypeForMem(RecordPointerTy)); + Args.emplace_back(Arg); } emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -90,6 +90,19 @@ auto *VD = C.getCapturedVar(); assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); + // Skip implicit captures for combined distribute loop bounds, + // those will be handled by later codegen. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + const auto *LoopDirective = cast(&S); + VarDecl *PrevLB = cast( + cast(LoopDirective->getPrevLowerBoundVariable()) + ->getDecl()); + VarDecl *PrevUB = cast( + cast(LoopDirective->getPrevUpperBoundVariable()) + ->getDecl()); + if (VD == PrevLB || VD == PrevUB) + continue; + } DeclRefExpr DRE( CGF.getContext(), const_cast(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -265,6 +278,19 @@ continue; assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); + // Skip implicit captures for combined distribute loop bounds, + // those will be handled by later codegen. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + const auto *LoopDirective = cast(&S); + VarDecl *PrevLB = cast( + cast(LoopDirective->getPrevLowerBoundVariable()) + ->getDecl()); + VarDecl *PrevUB = cast( + cast(LoopDirective->getPrevUpperBoundVariable()) + ->getDecl()); + if (VD == PrevLB || VD == PrevUB) + continue; + } DeclRefExpr DRE(CGF.getContext(), const_cast(VD), isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && @@ -322,6 +348,34 @@ return CGM.getSize(SizeInChars); } +void CodeGenFunction::GenerateOpenMPCapturedVarsAggregate( + const CapturedStmt &S, SmallVectorImpl &CapturedVars, + SmallVectorImpl &CapturedVarsElemTypes) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + QualType RecordTy = getContext().getRecordType(RD); + // Create the aggregate argument struct for the outlined function. + LValue AggLV = MakeAddrLValue( + CreateMemTemp(RecordTy, "omp.outlined.arg.agg."), RecordTy); + + // Initialize the aggregate with captured values. + auto CurField = RD->field_begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField) { + LValue LV = EmitLValueForFieldInitialization(AggLV, *CurField); + // Initialize for VLA. + if (CurField->hasCapturedVLAType()) { + EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); + } else + // Initialize for capturesThis, capturesVariableByCopy, + // capturesVariable + EmitInitializerForField(*CurField, LV, *I); + } + + CapturedVars.push_back(AggLV.getPointer(*this)); + CapturedVarsElemTypes.push_back(ConvertTypeForMem(AggLV.getType())); +} + void CodeGenFunction::GenerateOpenMPCapturedVars( const CapturedStmt &S, SmallVectorImpl &CapturedVars) { const RecordDecl *RD = S.getCapturedRecordDecl(); @@ -421,6 +475,103 @@ }; } // namespace +static llvm::Function *emitOutlinedFunctionPrologueAggregate( + CodeGenFunction &CGF, FunctionArgList &Args, + llvm::MapVector> + &LocalAddrs, + llvm::DenseMap> + &VLASizes, + llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc, + StringRef FunctionName) { + const CapturedDecl *CD = CS.getCapturedDecl(); + const RecordDecl *RD = CS.getCapturedRecordDecl(); + assert(CD->hasBody() && "missing CapturedDecl body"); + + CXXThisValue = nullptr; + // Build the argument list. + CodeGenModule &CGM = CGF.CGM; + ASTContext &Ctx = CGM.getContext(); + Args.append(CD->param_begin(), CD->param_end()); + + // Create the function declaration. + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + auto *F = + llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + FunctionName, &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->setDoesNotThrow(); + F->setDoesNotRecurse(); + + // Generate the function. + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc); + Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam()); + llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr); + LValue ContextLV = CGF.MakeNaturalAlignAddrLValue( + ContextV, CGM.getContext().getTagDeclType(RD)); + const auto *I = CS.captures().begin(); + for (const FieldDecl *FD : RD->fields()) { + LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD); + // Do not map arguments if we emit function with non-original types. + Address LocalAddr = FieldLV.getAddress(CGF); + // If we are capturing a pointer by copy we don't need to do anything, just + // use the value that we get from the arguments. + if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { + const VarDecl *CurVD = I->getCapturedVar(); + LocalAddrs.insert({FD, {CurVD, LocalAddr}}); + ++I; + continue; + } + + LValue ArgLVal = + CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + const VariableArrayType *VAT = FD->getCapturedVLAType(); + VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg); + } else if (I->capturesVariable()) { + const VarDecl *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(CGF); + if (ArgLVal.getType()->isLValueReferenceType()) { + ArgAddr = CGF.EmitLoadOfReference(ArgLVal); + } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { + assert(ArgLVal.getType()->isPointerType()); + ArgAddr = CGF.EmitLoadOfPointer( + ArgAddr, ArgLVal.getType()->castAs()); + } + LocalAddrs.insert( + {FD, + {Var, Address(ArgAddr.getPointer(), ArgAddr.getElementType(), + Ctx.getDeclAlign(Var))}}); + } else if (I->capturesVariableByCopy()) { + assert(!FD->getType()->isAnyPointerType() && + "Not expecting a captured pointer."); + const VarDecl *Var = I->getCapturedVar(); + Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD), + Var->getName()); + LValue CopyLVal = + CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl); + + RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation()); + CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal); + + LocalAddrs.insert({FD, {Var, CopyAddr}}); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); + LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}}); + } + ++I; + } + + return F; +} + static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, llvm::MapVector> @@ -605,6 +756,38 @@ return F; } +llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( + const CapturedStmt &S, SourceLocation Loc) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + // Build the argument list. + FunctionArgList Args; + llvm::MapVector> LocalAddrs; + llvm::DenseMap> VLASizes; + StringRef FunctionName = CapturedStmtInfo->getHelperName(); + llvm::Function *F = emitOutlinedFunctionPrologueAggregate( + *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName); + CodeGenFunction::OMPPrivateScope LocalScope(*this); + for (const auto &LocalAddrPair : LocalAddrs) { + if (LocalAddrPair.second.first) { + auto retAddrPair = [&LocalAddrPair]() { + return LocalAddrPair.second.second; + }; + LocalScope.addPrivate(LocalAddrPair.second.first, retAddrPair()); + } + } + (void)LocalScope.Privatize(); + for (const auto &VLASizePair : VLASizes) + VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; + PGO.assignRegionCounters(GlobalDecl(CD), F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + (void)LocalScope.ForceCleanup(); + FinishFunction(CD->getBodyRBrace()); + return F; +} + llvm::Function * CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc) { @@ -1474,9 +1657,8 @@ /// Codegen lambda for appending distribute lower and upper bounds to outlined /// parallel function. This is necessary for combined constructs such as /// 'distribute parallel for' -typedef llvm::function_ref &)> +typedef llvm::function_ref CodeGenBoundParametersTy; } // anonymous namespace @@ -1569,14 +1751,18 @@ OMPParallelScope Scope(CGF, S); llvm::SmallVector CapturedVars; + llvm::SmallVector CapturedVarsElemTypes; + // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk // lower and upper bounds with the pragma 'for' chunking mechanism. // The following lambda takes care of appending the lower and upper bound // parameters when necessary - CodeGenBoundParameters(CGF, S, CapturedVars); - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, - CapturedVars, IfCond, NumThreads); + CodeGenBoundParameters(CGF, S, *CS); + CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars, + CapturedVarsElemTypes); + CGF.CGM.getOpenMPRuntime().emitParallelCall( + CGF, S.getBeginLoc(), OutlinedFn, CapturedVars, CapturedVarsElemTypes, + IfCond, NumThreads); } static bool isAllocatableDecl(const VarDecl *VD) { @@ -1592,7 +1778,7 @@ static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, - llvm::SmallVectorImpl &) {} + const CapturedStmt &) {} static void emitOMPCopyinClause(CodeGenFunction &CGF, const OMPExecutableDirective &S) { @@ -3147,21 +3333,30 @@ static void emitDistributeParallelForDistributeInnerBoundParams( CodeGenFunction &CGF, const OMPExecutableDirective &S, - llvm::SmallVectorImpl &CapturedVars) { + const CapturedStmt &CS) { const auto &Dir = cast(S); + + // The first captured variable of the captured statement corresponds + // to inner lower bound. + VarDecl *PrevLBCapDecl = CS.captures().begin()->getCapturedVar(); + // The second captured variable corresponds to the inner upper bound. + VarDecl *PrevUBCapDecl = std::next(CS.captures().begin())->getCapturedVar(); + LValue LB = CGF.EmitLValue(cast(Dir.getCombinedLowerBoundVariable())); llvm::Value *LBCast = CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(LB.getAddress(CGF)), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(LBCast); + CGF.EmitStoreOfScalar(LBCast, CGF.GetAddrOfLocalVar(PrevLBCapDecl), + /*Volatile=*/false, PrevLBCapDecl->getType()); + LValue UB = CGF.EmitLValue(cast(Dir.getCombinedUpperBoundVariable())); - llvm::Value *UBCast = CGF.Builder.CreateIntCast(CGF.Builder.CreateLoad(UB.getAddress(CGF)), CGF.SizeTy, /*isSigned=*/false); - CapturedVars.push_back(UBCast); + CGF.EmitStoreOfScalar(UBCast, CGF.GetAddrOfLocalVar(PrevUBCapDecl), + /*Volatile=*/false, PrevUBCapDecl->getType()); } static void @@ -5613,6 +5808,14 @@ LValue IL = EmitOMPHelperVar(*this, cast(S.getIsLastIterVariable())); + // Emit previous upper, lower bound captured variables, if applicable. + if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) { + EmitOMPHelperVar(*this, + cast(S.getPrevLowerBoundVariable())); + EmitOMPHelperVar(*this, + cast(S.getPrevUpperBoundVariable())); + } + OMPPrivateScope LoopScope(*this); if (EmitOMPFirstprivateClause(S, LoopScope)) { // Emit implicit barrier to synchronize threads and avoid data races @@ -6749,7 +6952,10 @@ OMPTeamsScope Scope(CGF, S); llvm::SmallVector CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); + llvm::SmallVector CapturedVarsElemTypes; + + CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars, + CapturedVarsElemTypes); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, CapturedVars); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3342,8 +3342,14 @@ llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); + llvm::Function * + GenerateOpenMPCapturedStmtFunctionAggregate(const CapturedStmt &S, + SourceLocation Loc); llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, SourceLocation Loc); + void GenerateOpenMPCapturedVarsAggregate( + const CapturedStmt &S, SmallVectorImpl &CapturedVars, + SmallVectorImpl &CapturedVarsElemTypes); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl &CapturedVars); void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -4387,12 +4387,22 @@ Sema::CapturedParamNameType Params[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } case OMPD_target_teams_distribute_parallel_for: @@ -4443,14 +4453,24 @@ Sema::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ParamsParallel, /*OpenMPCaptureLevel=*/3); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } @@ -4488,14 +4508,24 @@ Sema::CapturedParamNameType ParamsParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), - std::make_pair(".previous.lb.", Context.getSizeType().withConst()), - std::make_pair(".previous.ub.", Context.getSizeType().withConst()), std::make_pair(StringRef(), QualType()) // __context with shared vars }; + + VarDecl *PrevLBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.lb"); + VarDecl *PrevUBDecl = + buildVarDecl(*this, SourceLocation(), Context.getSizeType().withConst(), + ".captured.omp.previous.ub"); + // Start a captured region for 'teams' or 'parallel'. Both regions have // the same implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, ParamsParallel, /*OpenMPCaptureLevel=*/1); + + MarkVariableReferenced(SourceLocation(), PrevLBDecl); + MarkVariableReferenced(SourceLocation(), PrevUBDecl); + break; } case OMPD_target_update: @@ -9752,23 +9782,23 @@ CombEUB = SemaRef.ActOnFinishFullExpr(CombEUB.get(), /*DiscardedValue*/ false); - const CapturedDecl *CD = cast(AStmt)->getCapturedDecl(); - // We expect to have at least 2 more parameters than the 'parallel' - // directive does - the lower and upper bounds of the previous schedule. - assert(CD->getNumParams() >= 4 && - "Unexpected number of parameters in loop combined directive"); - - // Set the proper type for the bounds given what we learned from the - // enclosed loops. - ImplicitParamDecl *PrevLBDecl = CD->getParam(/*PrevLB=*/2); - ImplicitParamDecl *PrevUBDecl = CD->getParam(/*PrevUB=*/3); - - // Previous lower and upper bounds are obtained from the region - // parameters. - PrevLB = - buildDeclRefExpr(SemaRef, PrevLBDecl, PrevLBDecl->getType(), InitLoc); - PrevUB = - buildDeclRefExpr(SemaRef, PrevUBDecl, PrevUBDecl->getType(), InitLoc); + const CapturedStmt *CS = cast(AStmt); + + // Refer to captured variables from the associated captured statement of + // the combined directive. First captured variable is the previous lower + // bound, second is the previous upper bound. + VarDecl *PrevLBCapDecl = CS->captures().begin()->getCapturedVar(); + assert(PrevLBCapDecl && + "Expected captured var for previous LB in combined directive"); + VarDecl *PrevUBCapDecl = + std::next(CS->captures().begin())->getCapturedVar(); + assert(PrevLBCapDecl && + "Expected captured var for previous UB in combined directive"); + + PrevLB = buildDeclRefExpr(SemaRef, PrevLBCapDecl, + PrevLBCapDecl->getType(), InitLoc); + PrevUB = buildDeclRefExpr(SemaRef, PrevUBCapDecl, + PrevUBCapDecl->getType(), InitLoc); } } diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c --- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c +++ b/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c @@ -35,231 +35,230 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_4:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_5:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_7:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_8:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_9:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_10:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_11:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_12:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_13:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_14:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_15:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_16:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_17:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_18:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_19:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_20:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_8]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_9]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_22:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_24:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_26:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_27:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_28:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_29:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_30:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_31:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_32:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_33:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_34:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_35:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_37:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_38:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_39:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_40:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_41:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_42:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_44:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_45:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_46:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_47:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_48:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_49:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_51:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_52:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_54:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_55:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_35]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_36]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_46]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_47]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_59:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_60:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_61:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_62:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_63:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_64:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_65:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_67:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_68:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_69:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_70:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_71:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_72:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_76:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_77:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_78:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_79:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_80:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_82:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_83:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_84:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_86:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_88:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_89:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_90:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_91:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_92:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_93:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_94:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_95:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_96:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_97:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_98:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_99:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_77]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_78]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_88]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_89]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_101:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_102:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_106:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_107:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_108:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_109:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_110:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective [[ADDR_111:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_112:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_113:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_114:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_115:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_116:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_117:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_118:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_119:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_120:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_121:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_122:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_124:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_125:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_126:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_127:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_128:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_129:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_130:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_131:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_132:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_133:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_135:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_136:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_137:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_138:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_139:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_141:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_142:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_119]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_120]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_130]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_131]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_143:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_144:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_145:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_146:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_148:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_149:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_151:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_152:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPDistributeParallelForSimdDirective [[ADDR_154:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_155:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_156:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_157:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: `-CapturedStmt [[ADDR_158:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_159:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_160:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_162:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_163:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_164:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_165:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_166:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_167:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_168:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_169:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_171:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_172:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_173:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_174:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_176:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_178:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_179:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_180:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_181:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_182:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_184:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_185:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_186:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_187:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_188:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_189:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_190:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_191:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_192:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_193:0x[a-z0-9]*]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_194:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_195:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_196:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_162]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_163]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_173]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_174]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_184]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_185]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_198:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_200:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_202:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_203:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c --- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c +++ b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c @@ -35,231 +35,230 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_4:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_5:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_7:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_8:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_9:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_10:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_11:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_12:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_13:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_14:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_15:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_16:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_8]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_17:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_18:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_19:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_20:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_8]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_9]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_22:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_24:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_26:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_27:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_28:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_29:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_30:0x[a-z0-9]*]] +// CHECK-NEXT: | `-CapturedStmt [[ADDR_31:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_32:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_33:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_34:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_35:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_36:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_37:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_38:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_39:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_40:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_41:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_42:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_35]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_44:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_45:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_46:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_47:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_48:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_49:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_51:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_52:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_54:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_46]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_55:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_35]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_36]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_46]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_47]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_59:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_60:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_61:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_62:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_63:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_27]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_64:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_28]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_65:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_66:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_67:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_68:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_69:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_70:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_71:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_72:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_76:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_77:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_78:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_79:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_80:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_82:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_83:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_84:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_77]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_86:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_88:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_89:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_90:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_91:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_92:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_93:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_94:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_95:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_96:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_88]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_97:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_98:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_99:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_77]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_78]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_88]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_89]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_101:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_102:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_66]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_106:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_67]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_107:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_108:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_109:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_110:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPDistributeParallelForDirective [[ADDR_111:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_112:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_113:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_114:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | `-CapturedStmt [[ADDR_115:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_116:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_117:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_118:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_119:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_120:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_121:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_122:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_124:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_125:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_126:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_127:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_119]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_128:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_129:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_130:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_131:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_132:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_133:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_135:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_136:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_137:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_138:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_130]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_139:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_141:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_142:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_119]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_120]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_130]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_131]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_143:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_144:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_145:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_146:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_108]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_148:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_109]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_149:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_151:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_152:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPDistributeParallelForDirective [[ADDR_154:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_155:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_156:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_157:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: `-CapturedStmt [[ADDR_158:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_159:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_160:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_162:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_163:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_164:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_165:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_166:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_167:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_168:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_169:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_162]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_171:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_172:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_173:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_174:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_176:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_178:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_179:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_180:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_181:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_173]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_182:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_184:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_185:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_186:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_187:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_188:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_189:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_190:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_191:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_192:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_184]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_193:0x[a-z0-9]*]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_194:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_195:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_196:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_162]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_163]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_173]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_174]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_184]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_185]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_198:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_200:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_202:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_151]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_203:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_152]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c +++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c @@ -35,1926 +35,1965 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_11:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_12:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_13:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_14:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_16:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_17:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_18:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_19:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_20:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_21:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_22:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_24:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_25:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_26:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_28:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_29:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_33:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_34:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_37:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_38:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_41:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_43:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_44:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_46:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_47:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_48:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_49:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_51:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_52:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_53:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_54:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_55:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_59:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_60:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_61:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_10]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_44]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_45]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_46]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_47]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_48]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_49]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_63:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_64:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_65:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_66:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_67:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_68:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_69:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_70:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_71:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_72:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_76:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_77:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_78:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_80:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_81:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_82:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_83:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_84:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_86:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_87:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_88:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_89:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_91:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_92:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_93:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_94:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_95:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_96:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_97:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_99:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_101:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_102:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_106:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_107:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_108:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_109:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_110:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_111:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_112:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_113:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_114:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_115:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_116:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_117:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_118:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_119:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_120:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_121:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_126:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_127:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_129:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_130:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_131:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_132:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_133:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_134:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_137:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_138:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_139:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_141:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_142:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_143:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_144:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_145:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_74]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_75]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_125]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_126]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_127]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_128]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_129]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_130]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_118]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_146:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_148:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_149:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_151:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_152:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_154:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_155:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_156:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_157:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_158:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_159:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_160:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_162:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_163:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_164:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_165:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_166:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_167:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_168:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_169:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_170:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_171:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_172:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_173:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_174:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_176:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_177:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_178:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_179:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_180:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_181:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_182:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_183:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_184:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_185:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_186:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_188:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_189:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_190:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_191:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_192:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_193:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_194:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_195:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_196:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_197:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_198:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_199:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_200:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_201:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_202:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_203:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_204:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_205:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_206:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_207:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_208:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_209:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_211:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_212:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_213:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_214:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_215:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_216:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_217:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_218:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_220:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_221:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_222:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_227:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_228:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_229:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_231:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_232:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_233:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_162]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_163]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_211]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_212]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_213]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_214]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_215]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_216]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_217]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_218]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_206]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_234:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_235:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_236:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_237:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_238:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_239:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_240:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_241:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_242:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_243:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_244:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_245:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_247:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_248:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_249:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_250:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_251:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_252:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_253:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_254:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_255:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_256:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_257:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_258:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_259:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_260:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_261:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_262:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_263:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_264:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_265:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_266:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_267:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_268:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_269:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_270:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_271:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_272:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_273:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_274:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_275:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_276:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_277:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_278:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_279:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_280:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_281:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_282:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_283:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_284:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_285:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_286:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_287:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_288:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_289:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_290:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_291:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_292:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_293:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_294:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_295:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_296:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_297:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_299:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_300:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_301:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_302:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_303:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_304:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_305:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_306:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_308:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_309:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_310:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_311:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_312:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_316:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_317:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_318:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_319:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_320:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_321:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_250]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_251]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_299]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_300]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_301]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_302]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_303]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_304]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_305]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_306]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_294]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_296]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_322:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_323:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_324:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_325:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_326:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_327:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_328:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForSimdDirective [[ADDR_329:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_330:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_331:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_332:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_333:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_334:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_335:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_336:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_337:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_338:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_339:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_340:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_341:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_342:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_343:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_344:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_345:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_346:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_347:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_348:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_349:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_350:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_351:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_352:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_353:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_354:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_355:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_356:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_357:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_358:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_359:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_361:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_362:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_363:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_364:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_365:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_366:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_367:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_368:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_369:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_370:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_371:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_372:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_373:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_374:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_375:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_377:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_378:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_379:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_380:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_381:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_382:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_384:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_385:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_386:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_387:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_388:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_389:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_390:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_391:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_392:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_393:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_394:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_395:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_396:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_397:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_398:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_399:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_400:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_401:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_402:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_403:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_404:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_405:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_406:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_407:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_408:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_409:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_410:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_411:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_412:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 30 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 30 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_413:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_415:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_416:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_417:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_418:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_419:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_420:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_421:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_422:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_423:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_424:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_425:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_426:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_427:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_428:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_429:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_430:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_340]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_341]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:23 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 30 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 30 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_401]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_402]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_403]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_404]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_405]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_406]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_407]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_408]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_409]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_410]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_411]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_412]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_395]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_397]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_431:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_432:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_433:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c +++ b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c @@ -35,1926 +35,1965 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:7:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_11:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_12:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_13:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_14:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_16:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_17:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_18:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_19:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_20:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_21:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_22:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_24:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_25:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_26:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_28:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_29:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_33:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_34:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_37:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_38:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_41:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_43:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_44:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_46:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_47:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_48:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_49:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_50:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_51:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_52:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_53:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_54:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_55:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_56:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_57:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_58:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_59:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_60:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_61:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_10]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_44]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_45]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_46]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_47]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_48]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_49]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_11]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_12]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_28]] <> 'const unsigned long' {{.*}}Var [[ADDR_29]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_32]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_33]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_34]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_36]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_37]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_38]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_39]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_41]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_43]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_13]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_14]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_15]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_17]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_18]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_19]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_20]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_21]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_5]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_22]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}Var [[ADDR_16]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_24]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_25]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_26]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | `-VarDecl [[ADDR_16]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_17]] 'int' 0 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_63:0x[a-z0-9]*]] line:9:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_64:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_65:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_66:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_67:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_68:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_69:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_70:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_71:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_72:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_73:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_74:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_75:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_76:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_77:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_78:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_80:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_81:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_82:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_83:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_84:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_85:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_86:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_87:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_88:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_89:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_91:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_92:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_93:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_94:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_95:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_96:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_97:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_99:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_100:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_101:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_102:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_103:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_104:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_105:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_106:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_107:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_108:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_109:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_110:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_111:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_112:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_113:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_114:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_115:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_116:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_117:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_118:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_119:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_120:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_121:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_126:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_127:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_129:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_130:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_131:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_132:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_133:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_134:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_137:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_138:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_139:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_140:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_141:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_142:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_143:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_144:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_145:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_74]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_75]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_118]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_125]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_126]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_127]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_128]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_129]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_130]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_76]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_77]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_103]] <> 'const unsigned long' {{.*}}Var [[ADDR_104]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_105]] <> 'const unsigned long' {{.*}}Var [[ADDR_106]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_107]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_108]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_109]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_110]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_111]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_112]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_113]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_114]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_115]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_116]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_117]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_118]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_119]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_120]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_121]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_78]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_79]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_80]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_83]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_84]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_85]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_86]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_69]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_87]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_88]] 'int' {{.*}}Var [[ADDR_81]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_89]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_90]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_92]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_93]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_94]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_95]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_96]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_70]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_97]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_91]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_99]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_100]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_101]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_102]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_81]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_82]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_91]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_92]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_146:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_64]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_65]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_148:0x[a-z0-9]*]] line:16:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_149:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_150:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_151:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_152:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_153:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_154:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_155:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_156:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_157:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_158:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_159:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_160:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_161:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_162:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_163:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_164:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_165:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_166:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_167:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_168:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_169:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_170:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_171:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_172:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_173:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_174:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_175:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_176:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_177:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_178:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_179:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_180:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_181:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_182:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_183:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_184:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_185:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_186:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_188:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_189:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_190:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_191:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_192:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_193:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_194:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_195:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_196:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_197:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_198:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_199:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_200:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_201:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_202:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_203:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_204:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_205:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_206:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_207:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_208:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_209:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_211:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_212:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_213:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_214:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_215:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_216:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_217:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_218:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_220:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_221:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_222:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_227:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_228:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_229:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_231:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_232:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_233:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_162]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_163]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_206]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_211]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_212]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_213]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_214]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_215]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_216]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_217]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_218]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_164]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_165]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_191]] <> 'const unsigned long' {{.*}}Var [[ADDR_192]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_193]] <> 'const unsigned long' {{.*}}Var [[ADDR_194]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_195]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_196]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_197]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_198]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_199]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_200]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_201]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_202]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_203]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_204]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_205]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_206]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_207]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_208]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_209]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_166]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_167]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_168]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_171]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_172]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_173]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_174]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_157]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_175]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_176]] 'int' {{.*}}Var [[ADDR_169]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_177]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_178]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_180]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_181]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_182]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_183]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_184]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_158]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_185]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_186]] 'int' {{.*}}Var [[ADDR_179]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_187]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_188]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_189]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_190]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_169]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_170]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_179]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_180]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_234:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_149]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_235:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_150]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_236:0x[a-z0-9]*]] line:23:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_237:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_238:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_239:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_240:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPCollapseClause [[ADDR_241:0x[a-z0-9]*]] +// CHECK-NEXT: | | `-ConstantExpr [[ADDR_242:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | |-value: Int 2 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_243:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_244:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_245:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_247:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_248:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_249:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_250:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_251:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_252:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_253:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_254:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_255:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_256:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_257:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_258:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_259:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_260:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_261:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_262:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_263:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_264:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_265:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_266:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_267:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_268:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_269:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_270:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_271:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_272:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_273:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_274:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_275:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_276:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_277:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_278:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_279:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_280:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_281:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_282:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_283:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_284:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_285:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_286:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_287:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_288:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_289:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_290:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_291:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_292:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_293:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_294:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr [[ADDR_295:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_296:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_297:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_299:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_300:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_301:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_302:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_303:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_304:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_305:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_306:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_308:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_309:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_310:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_311:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_312:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_316:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_317:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_318:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_319:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_320:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_321:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_250]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_251]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 30 +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_294]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_296]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 30 +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_299]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_300]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_301]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_302]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_303]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_304]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_305]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_306]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_252]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_253]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_279]] <> 'const unsigned long' {{.*}}Var [[ADDR_280]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_281]] <> 'const unsigned long' {{.*}}Var [[ADDR_282]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_283]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_284]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_285]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_286]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_287]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_288]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_289]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_290]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_291]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_292]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_293]] <> Implicit 30 +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_294]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_295]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_296]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_297]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_254]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_255]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_256]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_259]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_260]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_261]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_262]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_245]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_263]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_264]] 'int' {{.*}}Var [[ADDR_257]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_265]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_266]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_268]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_269]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_270]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_271]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_272]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_273]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_274]] 'int' {{.*}}Var [[ADDR_267]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_275]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_276]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_277]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_278]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_257]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_258]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_267]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_268]] 'int' 0 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_322:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_237]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_323:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_238]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_324:0x[a-z0-9]*]] line:30:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_325:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_326:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_327:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_328:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForDirective [[ADDR_329:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPCollapseClause [[ADDR_330:0x[a-z0-9]*]] +// CHECK-NEXT: | `-ConstantExpr [[ADDR_331:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | |-value: Int 2 +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_332:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_333:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_334:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_335:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_336:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_337:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_338:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_339:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_340:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_341:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_342:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_343:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_344:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_345:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_346:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_347:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_348:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_349:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_350:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_351:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_352:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_353:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_354:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_355:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_356:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_357:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_358:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_359:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_361:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_362:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_363:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_364:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_365:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_366:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_367:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_368:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_369:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_370:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_371:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_372:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_373:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_374:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_375:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_377:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_378:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_379:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_380:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_381:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_382:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_384:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_385:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_386:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_387:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_388:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_389:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_390:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_391:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_392:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_393:0x[a-z0-9]*]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_394:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_395:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_396:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_397:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr [[ADDR_398:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_399:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr [[ADDR_400:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_401:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_402:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_403:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_404:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_405:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_406:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_407:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_408:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_409:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_410:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_411:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_412:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 30 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 30 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_413:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_415:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_416:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_417:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_418:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_419:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_420:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_421:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_422:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_423:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_424:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_425:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_426:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_427:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_428:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_429:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_430:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_340]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_341]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_395]] col:3 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 30 +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_397]] col:5 implicit 'int' +// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 30 +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 30 +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_401]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_402]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_403]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_404]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_405]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_406]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_407]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_408]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_409]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_410]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_411]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_412]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_342]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_343]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_379]] <> 'const unsigned long' {{.*}}Var [[ADDR_380]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_381]] <> 'const unsigned long' {{.*}}Var [[ADDR_382]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_383]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_384]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_385]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_386]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_387]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_388]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_389]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_390]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_391]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_392]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_393]] <> implicit 'const unsigned long' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_394]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_395]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_396]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_397]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_398]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_399]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_400]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_344]] <> nothrow +// CHECK-NEXT: | |-ForStmt [[ADDR_345]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_346]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_348]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_349]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_350]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_351]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_352]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_334]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_353]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_354]] 'int' {{.*}}Var [[ADDR_347]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_355]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_356]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_358]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_359]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_361]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_362]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_335]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_363]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_364]] 'int' {{.*}}Var [[ADDR_357]] 'i' 'int' +// CHECK-NEXT: | | `-ForStmt [[ADDR_365]] +// CHECK-NEXT: | | |-DeclStmt [[ADDR_366]] +// CHECK-NEXT: | | | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_368]] 'int' 0 // CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_369]] 'int' '<' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_370]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_371]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_372]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_336]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' +// CHECK-NEXT: | | |-UnaryOperator [[ADDR_373]] 'int' postfix '++' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_374]] 'int' {{.*}}Var [[ADDR_367]] 'i' 'int' +// CHECK-NEXT: | | `-NullStmt [[ADDR_375]] +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_376]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_377]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_378]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' +// CHECK-NEXT: | |-VarDecl [[ADDR_347]] col:12 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_348]] 'int' 0 +// CHECK-NEXT: | |-VarDecl [[ADDR_357]] col:14 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_358]] 'int' 0 +// CHECK-NEXT: | `-VarDecl [[ADDR_367]] col:16 used i 'int' cinit +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_368]] 'int' 0 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_431:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_325]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_432:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_326]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_433:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_327]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c --- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c +++ b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c @@ -40,2144 +40,2143 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:3:1, line:8:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:8:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_11:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_12:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_13:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_14:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_16:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_17:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_18:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_19:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_20:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_22:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_24:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_26:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_28:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_29:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_33:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_34:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_37:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_38:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_41:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_44:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_46:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_47:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_48:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_49:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_50:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_51:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_52:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_54:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_55:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_56:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_57:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_58:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_59:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_60:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_61:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_63:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_64:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_65:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_66:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_67:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_68:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_69:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_70:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_71:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_72:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_73:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_10]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_11]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_43]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_44]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_45]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_46]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_47]] col:23 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_42]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' +// CHECK-NEXT: | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_48]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_49]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_50]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_51]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_52]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_53]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_54]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_55]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_56]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_57]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_58]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_60]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_61]] <> 'int' 1 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_74:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_75:0x[a-z0-9]*]] line:10:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_76:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_77:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_78:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_80:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_82:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_83:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_84:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_85:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_86:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_88:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_89:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_91:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_92:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_93:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_94:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_95:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_96:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_97:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_99:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_100:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_101:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_102:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_103:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_104:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_105:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_106:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_107:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_108:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_109:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_110:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_111:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_112:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_113:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_114:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_115:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_116:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_117:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_118:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_119:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_120:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_121:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_126:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_127:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_129:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_130:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_131:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_132:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_133:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_137:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_138:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_139:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_131]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_140:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_141:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_142:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_143:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_144:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_145:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_146:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_148:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_149:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_150:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_151:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_152:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_153:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_154:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_155:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_156:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_157:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_158:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_159:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_160:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_161:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_162:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_163:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_164:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_165:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_166:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_167:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_168:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_86]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_87]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_88]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_131]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_133]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_134]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_136]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_137]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_138]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_139]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_131]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_140]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_141]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_142]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_143]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_144]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_145]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_146]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_147]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_148]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_149]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_150]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_152]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_153]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_169:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_171:0x[a-z0-9]*]] line:18:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_172:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_173:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_174:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_175:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_176:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_178:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_179:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_180:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_181:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_182:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_184:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_185:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_186:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_188:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_189:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_190:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_191:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_192:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_193:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_194:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_195:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_196:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_198:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_200:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_202:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_203:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_204:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_205:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_206:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_207:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_208:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_209:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_211:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_212:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_213:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_214:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_215:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_216:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_217:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_218:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_220:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_221:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_222:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_227:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_228:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_229:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_231:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_232:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_233:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_234:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_235:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_236:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_237:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_238:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_230]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_239:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_240:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_241:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_242:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_243:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_244:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_245:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_247:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_248:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_249:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_250:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_251:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_252:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_253:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_254:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_255:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_256:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_257:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_258:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_259:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_260:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_261:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_262:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_263:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_264:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_265:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_266:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_267:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_182]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_183]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_184]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_185]] 'int' +// CHECK-NEXT: | | | | |-value: Int 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_186]] 'int' 1 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_187]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_230]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_232]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_233]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_234]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_235]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_236]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_237]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_238]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_230]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_239]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_240]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_241]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_242]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_243]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_244]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_245]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_247]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_248]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_249]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_251]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_252]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_268:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_269:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_270:0x[a-z0-9]*]] line:26:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_271:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_272:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_273:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_274:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_275:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_276:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_277:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_278:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_279:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_280:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_281:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_282:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_283:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_284:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_285:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_286:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_287:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_288:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_289:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_290:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_291:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_292:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_293:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_294:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_295:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_296:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_297:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_299:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_300:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_301:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_302:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_303:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_304:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_305:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_306:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_308:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_309:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_310:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_311:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_312:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_316:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_317:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_318:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_319:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_320:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_321:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_322:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_323:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_324:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_325:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_326:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_327:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_328:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_329:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_330:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_331:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_332:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_333:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_334:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_335:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_336:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_337:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_329]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_330]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_338:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_339:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_340:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_341:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_342:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_343:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_344:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | | |-ParenExpr [[ADDR_345:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_346:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_347:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_348:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | | `-ParenExpr [[ADDR_349:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_350:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_351:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_352:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_353:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_354:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_355:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_356:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_357:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_358:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_359:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_361:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_362:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_363:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_364:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_365:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_366:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_367:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_368:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_369:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_370:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_371:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_372:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_373:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_374:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_375:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_377:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_378:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_379:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_380:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_381:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_281]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_282]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_283]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_284]] 'int' +// CHECK-NEXT: | | | | |-value: Int 2 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_285]] 'int' 2 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_286]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_329]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_330]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_331]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_332]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_333]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_334]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_335]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_336]] col:23 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_337]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_329]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_330]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_338]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_339]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_340]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_341]] > 'long' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_342]] 'long' '*' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_343]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_344]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_345]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_346]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_347]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_348]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_349]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_350]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_351]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_353]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_354]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_355]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_356]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_357]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_358]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_359]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_361]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_362]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_364]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_365]] <> 'long' +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_366]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_382:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_384:0x[a-z0-9]*]] line:34:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_385:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_386:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_387:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_388:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetDirective [[ADDR_389:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_390:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_391:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_392:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_393:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_394:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_395:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_396:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_397:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_398:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-OMPCollapseClause [[ADDR_399:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-ConstantExpr [[ADDR_400:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_401:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | `-CapturedStmt [[ADDR_402:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_403:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_404:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_405:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_406:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_407:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_408:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_409:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_410:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_411:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_412:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_413:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_415:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_416:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_417:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_418:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_419:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_420:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_421:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_422:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_423:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_424:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_425:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_426:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_427:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_428:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_429:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_430:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_431:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_432:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_433:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_434:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_435:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_436:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_437:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_438:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_439:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_440:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_441:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_442:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_443:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_444:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_445:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_446:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_447:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_448:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_449:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_450:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_451:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_452:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_453:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_454:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_455:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_456:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_457:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_458:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_459:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_460:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_461:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_462:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_463:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_464:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_465:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_466:0x[a-z0-9]*]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_467:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_468:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_457]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_458]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_469:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_470:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-OMPCapturedExprDecl [[ADDR_471:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_472:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_473:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_474:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_475:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_476:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_477:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_478:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_479:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_480:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_481:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_482:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_483:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_484:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_485:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_486:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_487:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_488:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_489:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_490:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_491:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_492:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_493:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_494:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_495:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_496:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_497:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_498:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_499:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_500:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_501:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_502:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_503:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_504:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_505:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_506:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_507:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_508:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_509:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_510:0x[a-z0-9]*]] col:23 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_511:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_512:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_513:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_514:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_515:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_397]] <> nothrow +// CHECK-NEXT: | |-OMPTeamsDistributeParallelForSimdDirective [[ADDR_398]] +// CHECK-NEXT: | | |-OMPCollapseClause [[ADDR_399]] +// CHECK-NEXT: | | | `-ConstantExpr [[ADDR_400]] 'int' +// CHECK-NEXT: | | | |-value: Int 2 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_401]] 'int' 2 +// CHECK-NEXT: | | `-CapturedStmt [[ADDR_402]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_457]] col:23 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_458]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_460]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_461]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_462]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_463]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_464]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_465]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_466]] col:23 implicit 'int &' +// CHECK-NEXT: | | |-FieldDecl [[ADDR_467]] col:25 implicit 'int &' +// CHECK-NEXT: | | `-FieldDecl [[ADDR_468]] col:27 implicit 'int &' +// CHECK-NEXT: | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_457]] col:23 implicit 'int &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_458]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_469]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_470]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-OMPCapturedExprDecl [[ADDR_471]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | `-BinaryOperator [[ADDR_472]] > 'long' '-' +// CHECK-NEXT: | |-BinaryOperator [[ADDR_473]] 'long' '*' +// CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_474]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_475]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_476]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_477]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_478]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_479]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_480]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_481]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_482]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_484]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_485]] 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_486]] 'int' '/' +// CHECK-NEXT: | | |-ParenExpr [[ADDR_487]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_488]] 'int' '-' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_489]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_490]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | `-ParenExpr [[ADDR_491]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_492]] > 'int' '+' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_493]] 'int' '-' +// CHECK-NEXT: | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_495]] <> 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_496]] <> 'long' +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_497]] <> 'int' 1 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_516:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_517:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_518:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c --- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c +++ b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c @@ -40,2144 +40,2143 @@ ; } -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:3:1, line:8:1> line:3:6 test_one 'void (int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK: |-FunctionDecl [[ADDR_0:0x[a-z0-9]*]] <{{.*}}, line:8:1> line:3:6 test_one 'void (int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_1:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_2:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_3:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_4:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_5:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_6:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_7:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_8:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_9:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_10:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_11:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_12:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_13:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_14:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_15:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_16:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_17:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_18:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_19:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_20:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_21:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_22:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_23:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_24:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_25:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_26:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_27:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_28:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_29:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_30:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_31:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_32:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_33:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_34:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_35:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_36:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_37:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_38:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_39:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_40:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_41:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_42:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_43:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_44:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_45:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_46:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_47:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_48:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_49:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_50:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_51:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_52:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_53:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_54:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_55:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_56:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_57:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_58:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_59:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_60:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_61:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_62:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_63:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_64:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_65:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_66:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_67:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_68:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_69:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_70:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_71:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_72:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_73:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_9]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_10]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_11]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_43]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_44]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_45]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_46]] <> Implicit +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_47]] col:3 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_12]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_13]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_30]] <> 'const unsigned long' {{.*}}Var [[ADDR_31]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_32]] <> 'const unsigned long' {{.*}}Var [[ADDR_33]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_34]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_35]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_36]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_37]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_38]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_39]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_40]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_41]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_42]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_14]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_15]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_16]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_19]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_20]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_21]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_24]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_25]] 'int' {{.*}}Var [[ADDR_17]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_26]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_27]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_28]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_29]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' +// CHECK-NEXT: | | | `-VarDecl [[ADDR_17]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_48]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_22]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_23]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_49]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_50]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_51]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_52]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_53]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_54]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_55]] 'int' {{.*}}OMPCapturedExpr [[ADDR_48]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_56]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_57]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_58]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_18]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_60]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_59]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_61]] <> 'int' 1 +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_74:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_1]] 'x' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_75:0x[a-z0-9]*]] line:10:6 test_two 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_76:0x[a-z0-9]*]] col:19 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_77:0x[a-z0-9]*]] col:26 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_78:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_79:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_80:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_81:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_82:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_83:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_84:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_85:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_86:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_87:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_88:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_89:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_90:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_91:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_92:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_93:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_94:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_95:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_96:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_97:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_98:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_99:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_100:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_101:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_102:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_103:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_104:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_105:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_106:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_107:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_108:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_109:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_110:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_111:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_112:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_113:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_114:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_115:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_116:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_117:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_118:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_119:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_120:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_121:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_122:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_123:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_124:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_125:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_126:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_127:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_128:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_129:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_130:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_131:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_132:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_133:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_134:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_135:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_136:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_137:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_138:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_139:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_131]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_140:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_141:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_142:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_143:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_144:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_145:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_146:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_147:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_148:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_149:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_150:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_151:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_152:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_153:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_154:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_155:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_156:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_157:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_158:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_159:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_160:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_161:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_162:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_163:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_164:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_165:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_166:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_167:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_168:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_86]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_87]] +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_88]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_131]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_133]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_134]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_135]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_136]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_137]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_138]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_139]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_89]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_90]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_118]] <> 'const unsigned long' {{.*}}Var [[ADDR_119]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_120]] <> 'const unsigned long' {{.*}}Var [[ADDR_121]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_122]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_123]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_124]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_125]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_126]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_127]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_128]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_129]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_130]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_131]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_132]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_91]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_92]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_93]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_96]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_97]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_98]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_101]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_102]] 'int' {{.*}}Var [[ADDR_94]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_103]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_104]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_107]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_108]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_109]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_110]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_111]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_112]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_113]] 'int' {{.*}}Var [[ADDR_105]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_114]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_115]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_116]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_117]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_94]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_105]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_106]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_140]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_99]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_100]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_141]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_142]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_143]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_144]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_145]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_146]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_147]] 'int' {{.*}}OMPCapturedExpr [[ADDR_140]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_148]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_149]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_150]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_95]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_152]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_151]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_153]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_169:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_76]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_170:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_77]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_171:0x[a-z0-9]*]] line:18:6 test_three 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_172:0x[a-z0-9]*]] col:21 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_173:0x[a-z0-9]*]] col:28 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_174:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_175:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_176:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_177:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_178:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_179:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_180:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_181:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_182:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_183:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_184:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_185:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_186:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_187:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_188:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_189:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_190:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_191:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_192:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_193:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_194:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_195:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_196:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_197:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_198:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_199:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_200:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_201:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_202:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_203:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_204:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_205:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_206:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_207:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_208:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_209:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_210:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_211:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_212:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_213:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_214:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_215:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_216:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_217:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_218:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_219:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_220:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_221:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_222:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_223:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_224:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_225:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_226:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_227:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_228:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_229:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_230:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_231:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_232:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_233:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_234:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_235:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_236:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_237:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_238:0x[a-z0-9]*]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_230]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_239:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_240:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_241:0x[a-z0-9]*]] > 'int' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_242:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_243:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_244:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_245:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_246:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_247:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_248:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_249:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_250:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_251:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_252:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_253:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_254:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_255:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_256:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_257:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_258:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_259:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_260:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_261:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_262:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_263:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_264:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_265:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_266:0x[a-z0-9]*]] col:25 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_267:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_182]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_183]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_184]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_185]] 'int' +// CHECK-NEXT: | | | | |-value: Int 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_186]] 'int' 1 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_187]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_230]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_232]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_233]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_234]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_235]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_236]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_237]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_238]] col:25 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_188]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_189]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_217]] <> 'const unsigned long' {{.*}}Var [[ADDR_218]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_219]] <> 'const unsigned long' {{.*}}Var [[ADDR_220]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_221]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_222]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_223]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_224]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_225]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_226]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_227]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_228]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_229]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_230]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_231]] col:25 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_190]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_191]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_192]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_195]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_196]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_197]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_200]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_201]] 'int' {{.*}}Var [[ADDR_193]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_202]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_203]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_206]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_207]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_208]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_209]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_210]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_211]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_212]] 'int' {{.*}}Var [[ADDR_204]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_213]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_214]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_215]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_216]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_193]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_204]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_205]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_239]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_198]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_199]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_240]] > col:3 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_241]] > 'int' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_242]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_243]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_244]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_245]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_246]] 'int' {{.*}}OMPCapturedExpr [[ADDR_239]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_247]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_248]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_249]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_194]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_251]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_250]] 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_252]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_268:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_172]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_269:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_173]] 'y' 'int' +// CHECK-NEXT: |-FunctionDecl [[ADDR_270:0x[a-z0-9]*]] line:26:6 test_four 'void (int, int)' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_271:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: | |-ParmVarDecl [[ADDR_272:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: | `-CompoundStmt [[ADDR_273:0x[a-z0-9]*]] +// CHECK-NEXT: | `-OMPTargetDirective [[ADDR_274:0x[a-z0-9]*]] +// CHECK-NEXT: | |-OMPFirstprivateClause [[ADDR_275:0x[a-z0-9]*]] <> +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_276:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_277:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | `-CapturedStmt [[ADDR_278:0x[a-z0-9]*]] +// CHECK-NEXT: | |-CapturedDecl [[ADDR_279:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_280:0x[a-z0-9]*]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_281:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_282:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-OMPCollapseClause [[ADDR_283:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | `-ConstantExpr [[ADDR_284:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_285:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | | `-CapturedStmt [[ADDR_286:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_287:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | |-CapturedStmt [[ADDR_288:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-CapturedDecl [[ADDR_289:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | | |-ForStmt [[ADDR_290:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_291:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_292:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_293:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_294:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_295:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_296:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_297:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_298:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_299:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_300:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ForStmt [[ADDR_301:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | |-DeclStmt [[ADDR_302:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | | `-VarDecl [[ADDR_303:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral [[ADDR_304:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | | |-BinaryOperator [[ADDR_305:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr [[ADDR_306:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr [[ADDR_307:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr [[ADDR_308:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_309:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | | |-UnaryOperator [[ADDR_310:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_311:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-NullStmt [[ADDR_312:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_313:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_314:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl [[ADDR_315:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_316:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_317:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_318:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_319:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | | |-DeclRefExpr [[ADDR_320:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_321:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_322:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_323:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_324:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-RecordDecl [[ADDR_325:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr [[ADDR_326:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_327:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_328:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | | |-FieldDecl [[ADDR_329:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | | `-FieldDecl [[ADDR_330:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_331:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_332:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_333:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_334:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_335:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_336:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_337:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_329]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_330]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_338:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-OMPCapturedExprDecl [[ADDR_339:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | `-OMPCapturedExprDecl [[ADDR_340:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_341:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_342:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_343:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_344:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | | |-ParenExpr [[ADDR_345:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_346:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_347:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_348:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | | `-ParenExpr [[ADDR_349:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-BinaryOperator [[ADDR_350:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_351:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_352:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_353:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_354:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_355:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_356:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_357:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_358:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_359:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_360:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_361:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_362:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_363:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_364:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_365:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_366:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_367:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_368:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-AlwaysInlineAttr [[ADDR_369:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_370:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_371:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_372:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_373:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_374:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_375:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_376:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_377:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_378:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | | `-OMPCaptureKindAttr [[ADDR_379:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_380:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_381:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_281]] <> nothrow +// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective [[ADDR_282]] +// CHECK-NEXT: | | | |-OMPCollapseClause [[ADDR_283]] +// CHECK-NEXT: | | | | `-ConstantExpr [[ADDR_284]] 'int' +// CHECK-NEXT: | | | | |-value: Int 2 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_285]] 'int' 2 +// CHECK-NEXT: | | | `-CapturedStmt [[ADDR_286]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_329]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_330]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_331]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_332]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_333]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_334]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_335]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_336]] col:3 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_337]] col:5 implicit 'int &' +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_287]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_288]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_316]] <> 'const unsigned long' {{.*}}Var [[ADDR_317]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_318]] <> 'const unsigned long' {{.*}}Var [[ADDR_319]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_320]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_321]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_322]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_323]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_324]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_325]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_326]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_327]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_328]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_329]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_330]] col:5 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_289]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_290]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_291]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_294]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_295]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_296]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_299]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_300]] 'int' {{.*}}Var [[ADDR_292]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_301]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_302]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_305]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_306]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_307]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_310]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_311]] 'int' {{.*}}Var [[ADDR_303]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_312]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_313]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_314]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_315]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_292]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_303]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_338]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_297]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_298]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | | |-OMPCapturedExprDecl [[ADDR_339]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_308]] 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_309]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: | | `-OMPCapturedExprDecl [[ADDR_340]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_341]] > 'long' '-' +// CHECK-NEXT: | | |-BinaryOperator [[ADDR_342]] 'long' '*' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_343]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_344]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_345]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_346]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_347]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_348]] 'int' {{.*}}OMPCapturedExpr [[ADDR_338]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_349]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_350]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_351]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_293]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_353]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_352]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_354]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_355]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_356]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_357]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_358]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_359]] 'int' {{.*}}OMPCapturedExpr [[ADDR_339]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_360]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_361]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_362]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_304]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_364]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_363]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_365]] <> 'long' +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_366]] <> 'int' 1 +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_382:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_271]] 'x' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_383:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_272]] 'y' 'int' +// CHECK-NEXT: `-FunctionDecl [[ADDR_384:0x[a-z0-9]*]] line:34:6 test_five 'void (int, int, int)' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_385:0x[a-z0-9]*]] col:20 used x 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_386:0x[a-z0-9]*]] col:27 used y 'int' +// CHECK-NEXT: |-ParmVarDecl [[ADDR_387:0x[a-z0-9]*]] col:34 used z 'int' +// CHECK-NEXT: `-CompoundStmt [[ADDR_388:0x[a-z0-9]*]] +// CHECK-NEXT: `-OMPTargetDirective [[ADDR_389:0x[a-z0-9]*]] +// CHECK-NEXT: |-OMPFirstprivateClause [[ADDR_390:0x[a-z0-9]*]] <> +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_391:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-DeclRefExpr [[ADDR_392:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-DeclRefExpr [[ADDR_393:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: `-CapturedStmt [[ADDR_394:0x[a-z0-9]*]] +// CHECK-NEXT: |-CapturedDecl [[ADDR_395:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | |-CapturedStmt [[ADDR_396:0x[a-z0-9]*]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_397:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForDirective [[ADDR_398:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-OMPCollapseClause [[ADDR_399:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | `-ConstantExpr [[ADDR_400:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | |-value: Int 2 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_401:0x[a-z0-9]*]] 'int' 2 +// CHECK-NEXT: | | | | `-CapturedStmt [[ADDR_402:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_403:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | |-CapturedStmt [[ADDR_404:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | |-CapturedDecl [[ADDR_405:0x[a-z0-9]*]] <> nothrow +// CHECK-NEXT: | | | | | | | |-ForStmt [[ADDR_406:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_407:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_408:0x[a-z0-9]*]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_409:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_410:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_411:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_412:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_413:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_414:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_415:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_416:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_417:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_418:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_419:0x[a-z0-9]*]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_420:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_421:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_422:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_423:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_424:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_425:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_426:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_427:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ForStmt [[ADDR_428:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | |-DeclStmt [[ADDR_429:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | | | `-VarDecl [[ADDR_430:0x[a-z0-9]*]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral [[ADDR_431:0x[a-z0-9]*]] 'int' 0 // CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | | |-BinaryOperator [[ADDR_432:0x[a-z0-9]*]] 'int' '<' +// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr [[ADDR_433:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr [[ADDR_434:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr [[ADDR_435:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_436:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | | |-UnaryOperator [[ADDR_437:0x[a-z0-9]*]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_438:0x[a-z0-9]*]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-NullStmt [[ADDR_439:0x[a-z0-9]*]] +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_440:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_441:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl [[ADDR_442:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_443:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_444:0x[a-z0-9]*]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_445:0x[a-z0-9]*]] <> 'const unsigned long' {{.*}}Var [[ADDR_446:0x[a-z0-9]*]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_447:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-DeclRefExpr [[ADDR_448:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_449:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_450:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_451:0x[a-z0-9]*]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_452:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-RecordDecl [[ADDR_453:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | | |-CapturedRecordAttr [[ADDR_454:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_455:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_456:0x[a-z0-9]*]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_457:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | | |-FieldDecl [[ADDR_458:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | | `-FieldDecl [[ADDR_459:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_460:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_461:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_462:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_463:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_464:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_465:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_466:0x[a-z0-9]*]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_467:0x[a-z0-9]*]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_468:0x[a-z0-9]*]] col:27 implicit 'int &' +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_457]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | | |-FieldDecl [[ADDR_458]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_469:0x[a-z0-9]*]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-OMPCapturedExprDecl [[ADDR_470:0x[a-z0-9]*]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-OMPCapturedExprDecl [[ADDR_471:0x[a-z0-9]*]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_472:0x[a-z0-9]*]] > 'long' '-' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_473:0x[a-z0-9]*]] 'long' '*' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_474:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_475:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | | |-ParenExpr [[ADDR_476:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_477:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_478:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_479:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | | `-ParenExpr [[ADDR_480:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-BinaryOperator [[ADDR_481:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_482:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_483:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_484:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_485:0x[a-z0-9]*]] 'long' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_486:0x[a-z0-9]*]] 'int' '/' +// CHECK-NEXT: | | | | |-ParenExpr [[ADDR_487:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_488:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_489:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_490:0x[a-z0-9]*]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | | `-ParenExpr [[ADDR_491:0x[a-z0-9]*]] 'int' +// CHECK-NEXT: | | | | | `-BinaryOperator [[ADDR_492:0x[a-z0-9]*]] > 'int' '+' +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_493:0x[a-z0-9]*]] 'int' '-' +// CHECK-NEXT: | | | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_494:0x[a-z0-9]*]] 'int' 1 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_495:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-ImplicitCastExpr [[ADDR_496:0x[a-z0-9]*]] <> 'long' +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_497:0x[a-z0-9]*]] <> 'int' 1 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_498:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_499:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_500:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-AlwaysInlineAttr [[ADDR_501:0x[a-z0-9]*]] <> Implicit __forceinline +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_502:0x[a-z0-9]*]] col:1 implicit .global_tid. 'const int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_503:0x[a-z0-9]*]] col:1 implicit .part_id. 'const int *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_504:0x[a-z0-9]*]] col:1 implicit .privates. 'void *const restrict' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_505:0x[a-z0-9]*]] col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_506:0x[a-z0-9]*]] col:1 implicit .task_t. 'void *const' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_507:0x[a-z0-9]*]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_508:0x[a-z0-9]*]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_509:0x[a-z0-9]*]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_510:0x[a-z0-9]*]] col:3 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_511:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | |-FieldDecl [[ADDR_512:0x[a-z0-9]*]] col:5 implicit 'int' +// CHECK-NEXT: | | | `-OMPCaptureKindAttr [[ADDR_513:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | | `-FieldDecl [[ADDR_514:0x[a-z0-9]*]] col:27 implicit 'int' +// CHECK-NEXT: | | `-OMPCaptureKindAttr [[ADDR_515:0x[a-z0-9]*]] <> Implicit 30 +// CHECK-NEXT: | `-CapturedDecl [[ADDR_397]] <> nothrow +// CHECK-NEXT: | |-OMPTeamsDistributeParallelForDirective [[ADDR_398]] +// CHECK-NEXT: | | |-OMPCollapseClause [[ADDR_399]] +// CHECK-NEXT: | | | `-ConstantExpr [[ADDR_400]] 'int' +// CHECK-NEXT: | | | |-value: Int 2 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_401]] 'int' 2 +// CHECK-NEXT: | | `-CapturedStmt [[ADDR_402]] +// CHECK-NEXT: | | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_457]] col:3 implicit 'int &' +// CHECK-NEXT: | | | | |-FieldDecl [[ADDR_458]] col:5 implicit 'int &' +// CHECK-NEXT: | | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_460]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | |-DeclRefExpr [[ADDR_461]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_462]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | |-ImplicitParamDecl [[ADDR_463]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' +// CHECK-NEXT: | |-RecordDecl [[ADDR_464]] col:1 implicit struct definition +// CHECK-NEXT: | | |-CapturedRecordAttr [[ADDR_465]] <> Implicit +// CHECK-NEXT: | | |-FieldDecl [[ADDR_466]] col:3 implicit 'int &' +// CHECK-NEXT: | | |-FieldDecl [[ADDR_467]] col:5 implicit 'int &' +// CHECK-NEXT: | | `-FieldDecl [[ADDR_468]] col:27 implicit 'int &' +// CHECK-NEXT: | |-CapturedDecl [[ADDR_403]] <> nothrow +// CHECK-NEXT: | | |-CapturedStmt [[ADDR_404]] +// CHECK-NEXT: | | | |-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_443]] <> 'const unsigned long' {{.*}}Var [[ADDR_444]] '.captured.omp.previous.lb' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_445]] <> 'const unsigned long' {{.*}}Var [[ADDR_446]] '.captured.omp.previous.ub' 'const unsigned long' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_447]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-DeclRefExpr [[ADDR_448]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | `-DeclRefExpr [[ADDR_449]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_450]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_451]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_452]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | |-RecordDecl [[ADDR_453]] col:1 implicit struct definition +// CHECK-NEXT: | | | |-CapturedRecordAttr [[ADDR_454]] <> Implicit +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_455]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_456]] <> implicit 'const unsigned long &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_457]] col:3 implicit 'int &' +// CHECK-NEXT: | | | |-FieldDecl [[ADDR_458]] col:5 implicit 'int &' +// CHECK-NEXT: | | | `-FieldDecl [[ADDR_459]] col:27 implicit 'int &' +// CHECK-NEXT: | | `-CapturedDecl [[ADDR_405]] <> nothrow +// CHECK-NEXT: | | |-ForStmt [[ADDR_406]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_407]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_410]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_411]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_412]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_415]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_416]] 'int' {{.*}}Var [[ADDR_408]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_417]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_418]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_421]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_422]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_423]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_426]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_427]] 'int' {{.*}}Var [[ADDR_419]] 'i' 'int' +// CHECK-NEXT: | | | `-ForStmt [[ADDR_428]] +// CHECK-NEXT: | | | |-DeclStmt [[ADDR_429]] +// CHECK-NEXT: | | | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_431]] 'int' 0 // CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_432]] 'int' '<' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_433]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_434]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | | `-ImplicitCastExpr [[ADDR_435]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_436]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' +// CHECK-NEXT: | | | |-UnaryOperator [[ADDR_437]] 'int' postfix '++' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_438]] 'int' {{.*}}Var [[ADDR_430]] 'i' 'int' +// CHECK-NEXT: | | | `-NullStmt [[ADDR_439]] +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_440]] col:1 implicit .global_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_441]] col:1 implicit .bound_tid. 'const int *const restrict' +// CHECK-NEXT: | | |-ImplicitParamDecl [[ADDR_442]] col:1 implicit __context 'struct ({{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' +// CHECK-NEXT: | | |-VarDecl [[ADDR_408]] col:12 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | |-VarDecl [[ADDR_419]] col:14 used i 'int' cinit +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | `-VarDecl [[ADDR_430]] col:16 used i 'int' cinit +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_431]] 'int' 0 +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_469]] col:23 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_413]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_414]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: | |-OMPCapturedExprDecl [[ADDR_470]] col:25 implicit used .capture_expr. 'int' +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_424]] 'int' +// CHECK-NEXT: | | `-DeclRefExpr [[ADDR_425]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: | `-OMPCapturedExprDecl [[ADDR_471]] > col:3 implicit used .capture_expr. 'long' +// CHECK-NEXT: | `-BinaryOperator [[ADDR_472]] > 'long' '-' +// CHECK-NEXT: | |-BinaryOperator [[ADDR_473]] 'long' '*' +// CHECK-NEXT: | | |-ImplicitCastExpr [[ADDR_474]] 'long' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_475]] 'int' '/' +// CHECK-NEXT: | | | |-ParenExpr [[ADDR_476]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_477]] 'int' '-' +// CHECK-NEXT: | | | | |-ImplicitCastExpr [[ADDR_478]] 'int' +// CHECK-NEXT: | | | | | `-DeclRefExpr [[ADDR_479]] 'int' {{.*}}OMPCapturedExpr [[ADDR_469]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | | `-ParenExpr [[ADDR_480]] 'int' +// CHECK-NEXT: | | | | `-BinaryOperator [[ADDR_481]] > 'int' '+' +// CHECK-NEXT: | | | | |-BinaryOperator [[ADDR_482]] 'int' '-' +// CHECK-NEXT: | | | | | |-IntegerLiteral [[ADDR_409]] 'int' 0 +// CHECK-NEXT: | | | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_484]] <> 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_483]] 'int' 1 +// CHECK-NEXT: | | `-ImplicitCastExpr [[ADDR_485]] 'long' +// CHECK-NEXT: | | `-BinaryOperator [[ADDR_486]] 'int' '/' +// CHECK-NEXT: | | |-ParenExpr [[ADDR_487]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_488]] 'int' '-' +// CHECK-NEXT: | | | |-ImplicitCastExpr [[ADDR_489]] 'int' +// CHECK-NEXT: | | | | `-DeclRefExpr [[ADDR_490]] 'int' {{.*}}OMPCapturedExpr [[ADDR_470]] '.capture_expr.' 'int' +// CHECK-NEXT: | | | `-ParenExpr [[ADDR_491]] 'int' +// CHECK-NEXT: | | | `-BinaryOperator [[ADDR_492]] > 'int' '+' +// CHECK-NEXT: | | | |-BinaryOperator [[ADDR_493]] 'int' '-' +// CHECK-NEXT: | | | | |-IntegerLiteral [[ADDR_420]] 'int' 0 +// CHECK-NEXT: | | | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | | | `-IntegerLiteral [[ADDR_495]] <> 'int' 1 +// CHECK-NEXT: | | `-IntegerLiteral [[ADDR_494]] 'int' 1 +// CHECK-NEXT: | `-ImplicitCastExpr [[ADDR_496]] <> 'long' +// CHECK-NEXT: | `-IntegerLiteral [[ADDR_497]] <> 'int' 1 +// CHECK-NEXT: |-DeclRefExpr [[ADDR_516:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_385]] 'x' 'int' +// CHECK-NEXT: |-DeclRefExpr [[ADDR_517:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_386]] 'y' 'int' +// CHECK-NEXT: `-DeclRefExpr [[ADDR_518:0x[a-z0-9]*]] 'int' {{.*}}ParmVar [[ADDR_387]] 'z' 'int' diff --git a/clang/test/OpenMP/align_clause_codegen.cpp b/clang/test/OpenMP/align_clause_codegen.cpp --- a/clang/test/OpenMP/align_clause_codegen.cpp +++ b/clang/test/OpenMP/align_clause_codegen.cpp @@ -99,14 +99,14 @@ // CHECK-32-NEXT: [[DOTFOO10__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 128, i32 720, ptr null) // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO10__VOID_ADDR]], ptr null) // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO9__VOID_ADDR]], ptr inttoptr (i32 8 to ptr)) -// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[MYALLOC]], align 4 -// CHECK-32-NEXT: [[CONV:%.*]] = inttoptr i32 [[TMP3]] to ptr +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = inttoptr i32 [[TMP1]] to ptr // CHECK-32-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 4, ptr [[CONV]]) -// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[MYALLOC]], align 4 -// CHECK-32-NEXT: [[CONV1:%.*]] = inttoptr i32 [[TMP4]] to ptr +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV1:%.*]] = inttoptr i32 [[TMP2]] to ptr // CHECK-32-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 40, ptr [[CONV1]]) -// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[MYALLOC]], align 4 -// CHECK-32-NEXT: [[CONV2:%.*]] = inttoptr i32 [[TMP5]] to ptr +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV2:%.*]] = inttoptr i32 [[TMP3]] to ptr // CHECK-32-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 4, i32 80, ptr [[CONV2]]) // CHECK-32-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, ptr null) // CHECK-32-NEXT: [[DOTBAR5__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i32 16, i32 4, ptr null) @@ -114,14 +114,14 @@ // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR6__VOID_ADDR]], ptr null) // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR5__VOID_ADDR]], ptr null) // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR4__VOID_ADDR]], ptr null) -// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[MYALLOC]], align 4 -// CHECK-32-NEXT: [[CONV3:%.*]] = inttoptr i32 [[TMP10]] to ptr +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV3:%.*]] = inttoptr i32 [[TMP4]] to ptr // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR3__VOID_ADDR]], ptr [[CONV3]]) -// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[MYALLOC]], align 4 -// CHECK-32-NEXT: [[CONV4:%.*]] = inttoptr i32 [[TMP12]] to ptr +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV4:%.*]] = inttoptr i32 [[TMP5]] to ptr // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR2__VOID_ADDR]], ptr [[CONV4]]) -// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[MYALLOC]], align 4 -// CHECK-32-NEXT: [[CONV5:%.*]] = inttoptr i32 [[TMP14]] to ptr +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[MYALLOC]], align 4 +// CHECK-32-NEXT: [[CONV5:%.*]] = inttoptr i32 [[TMP6]] to ptr // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR1__VOID_ADDR]], ptr [[CONV5]]) // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO8__VOID_ADDR]], ptr null) // CHECK-32-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO7__VOID_ADDR]], ptr inttoptr (i32 8 to ptr)) @@ -174,14 +174,14 @@ // CHECK-NEXT: [[DOTFOO10__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 128, i64 720, ptr null) // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO10__VOID_ADDR]], ptr null) // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO9__VOID_ADDR]], ptr inttoptr (i64 8 to ptr)) -// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MYALLOC]], align 8 -// CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP3]] to ptr +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = inttoptr i64 [[TMP1]] to ptr // CHECK-NEXT: [[DOTBAR1__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 4, ptr [[CONV]]) -// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[MYALLOC]], align 8 -// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP4]] to ptr +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV1:%.*]] = inttoptr i64 [[TMP2]] to ptr // CHECK-NEXT: [[DOTBAR2__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 48, ptr [[CONV1]]) -// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[MYALLOC]], align 8 -// CHECK-NEXT: [[CONV2:%.*]] = inttoptr i64 [[TMP5]] to ptr +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV2:%.*]] = inttoptr i64 [[TMP3]] to ptr // CHECK-NEXT: [[DOTBAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 4, i64 80, ptr [[CONV2]]) // CHECK-NEXT: [[DOTBAR4__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 8, ptr null) // CHECK-NEXT: [[DOTBAR5__VOID_ADDR:%.*]] = call ptr @__kmpc_aligned_alloc(i32 [[TMP0]], i64 16, i64 4, ptr null) @@ -189,14 +189,14 @@ // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR6__VOID_ADDR]], ptr null) // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR5__VOID_ADDR]], ptr null) // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR4__VOID_ADDR]], ptr null) -// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[MYALLOC]], align 8 -// CHECK-NEXT: [[CONV3:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV3:%.*]] = inttoptr i64 [[TMP4]] to ptr // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR3__VOID_ADDR]], ptr [[CONV3]]) -// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[MYALLOC]], align 8 -// CHECK-NEXT: [[CONV4:%.*]] = inttoptr i64 [[TMP12]] to ptr +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV4:%.*]] = inttoptr i64 [[TMP5]] to ptr // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR2__VOID_ADDR]], ptr [[CONV4]]) -// CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[MYALLOC]], align 8 -// CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[MYALLOC]], align 8 +// CHECK-NEXT: [[CONV5:%.*]] = inttoptr i64 [[TMP6]] to ptr // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTBAR1__VOID_ADDR]], ptr [[CONV5]]) // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO8__VOID_ADDR]], ptr null) // CHECK-NEXT: call void @__kmpc_free(i32 [[TMP0]], ptr [[DOTFOO7__VOID_ADDR]], ptr inttoptr (i64 8 to ptr)) diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp --- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp +++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp @@ -20,7 +20,7 @@ #pragma omp end declare target #endif -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_.*_.*}}_A_l19_ctor +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_A_l19_ctor // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: call void @_ZN1SC1Ev(ptr noundef nonnull align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @A to ptr)) #[[ATTR3:[0-9]+]] @@ -38,7 +38,7 @@ // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_.*_.*}}_A_l19_dtor +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_A_l19_dtor // CHECK-SAME: () #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 4 dereferenceable(4) addrspacecast (ptr addrspace(1) @A to ptr)) #[[ATTR4:[0-9]+]] diff --git a/clang/test/OpenMP/atomic_compare_codegen.cpp b/clang/test/OpenMP/atomic_compare_codegen.cpp --- a/clang/test/OpenMP/atomic_compare_codegen.cpp +++ b/clang/test/OpenMP/atomic_compare_codegen.cpp @@ -6,7 +6,6 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -target-cpu core2 -fopenmp-simd -fopenmp-version=51 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=51 -x c -triple x86_64-apple-darwin10 -target-cpu core2 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER @@ -13808,8 +13807,7 @@ } #endif -// CHECK-LABEL: define {{[^@]+}}@foo -// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-LABEL: @foo( // CHECK-NEXT: entry: // CHECK-NEXT: [[CX:%.*]] = alloca i8, align 1 // CHECK-NEXT: [[CE:%.*]] = alloca i8, align 1 @@ -16130,8 +16128,7 @@ // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@bar -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @bar( // CHECK-NEXT: entry: // CHECK-NEXT: [[CX:%.*]] = alloca i8, align 1 // CHECK-NEXT: [[CV:%.*]] = alloca i8, align 1 @@ -25386,8 +25383,7 @@ // CHECK-NEXT: ret void // // -// CHECK-LABEL: define {{[^@]+}}@cxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @cxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[CX:%.*]] = alloca i8, align 1 // CHECK-NEXT: [[CV:%.*]] = alloca i8, align 1 @@ -25583,8 +25579,7 @@ // CHECK-NEXT: ret i8 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@ucxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @ucxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[UCX:%.*]] = alloca i8, align 1 // CHECK-NEXT: [[UCV:%.*]] = alloca i8, align 1 @@ -25780,8 +25775,7 @@ // CHECK-NEXT: ret i8 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@sxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @sxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[SX:%.*]] = alloca i16, align 2 // CHECK-NEXT: [[SV:%.*]] = alloca i16, align 2 @@ -25977,8 +25971,7 @@ // CHECK-NEXT: ret i16 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@usxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @usxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[USX:%.*]] = alloca i16, align 2 // CHECK-NEXT: [[USV:%.*]] = alloca i16, align 2 @@ -26174,8 +26167,7 @@ // CHECK-NEXT: ret i16 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@ixevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @ixevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[IX:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[IV:%.*]] = alloca i32, align 4 @@ -26371,8 +26363,7 @@ // CHECK-NEXT: ret i32 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@uixevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @uixevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[UIX:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[UIV:%.*]] = alloca i32, align 4 @@ -26568,8 +26559,7 @@ // CHECK-NEXT: ret i32 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@lxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @lxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[LX:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[LV:%.*]] = alloca i64, align 8 @@ -26765,8 +26755,7 @@ // CHECK-NEXT: ret i64 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@ulxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @ulxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[ULX:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[ULV:%.*]] = alloca i64, align 8 @@ -26962,8 +26951,7 @@ // CHECK-NEXT: ret i64 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@llxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @llxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[LLX:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[LLV:%.*]] = alloca i64, align 8 @@ -27159,8 +27147,7 @@ // CHECK-NEXT: ret i64 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@ullxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @ullxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[ULLX:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[ULLV:%.*]] = alloca i64, align 8 @@ -27356,8 +27343,7 @@ // CHECK-NEXT: ret i64 [[TMP132]] // // -// CHECK-LABEL: define {{[^@]+}}@fxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @fxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[FX:%.*]] = alloca float, align 4 // CHECK-NEXT: [[FV:%.*]] = alloca float, align 4 @@ -27589,8 +27575,7 @@ // CHECK-NEXT: ret float [[TMP168]] // // -// CHECK-LABEL: define {{[^@]+}}@dxevd -// CHECK-SAME: () #[[ATTR0]] { +// CHECK-LABEL: @dxevd( // CHECK-NEXT: entry: // CHECK-NEXT: [[DX:%.*]] = alloca double, align 8 // CHECK-NEXT: [[DV:%.*]] = alloca double, align 8 @@ -27820,3 +27805,34164 @@ // CHECK-NEXT: call void @__kmpc_flush(ptr @[[GLOB1]]) // CHECK-NEXT: [[TMP168:%.*]] = load double, ptr [[DV]], align 8 // CHECK-NEXT: ret double [[TMP168]] +// +// +// SIMD-ONLY0-LABEL: @foo( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[CX:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CE:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CD:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCX:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCE:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCD:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[SX:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SE:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SD:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USX:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USE:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USD:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[IX:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[IE:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[ID:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIX:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIE:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UID:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[LX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[FX:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FE:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FD:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[DX:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DE:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DD:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV3:%.*]] = sext i8 [[TMP2]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = sext i8 [[TMP3]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: [[CONV5:%.*]] = trunc i32 [[COND]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV5]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i32 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV7:%.*]] = sext i8 [[TMP5]] to i32 +// SIMD-ONLY0-NEXT: [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]] +// SIMD-ONLY0: cond.true10: +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = sext i8 [[TMP6]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false12: +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV13:%.*]] = sext i8 [[TMP7]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ] +// SIMD-ONLY0-NEXT: [[CONV16:%.*]] = trunc i32 [[COND15]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV16]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV17:%.*]] = sext i8 [[TMP8]] to i32 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV18:%.*]] = sext i8 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[CONV17]], [[CONV18]] +// SIMD-ONLY0-NEXT: br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true21: +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV22:%.*]] = sext i8 [[TMP10]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV24:%.*]] = sext i8 [[TMP11]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25]] +// SIMD-ONLY0: cond.end25: +// SIMD-ONLY0-NEXT: [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: [[CONV27:%.*]] = trunc i32 [[COND26]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV27]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV28:%.*]] = sext i8 [[TMP12]] to i32 +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV29:%.*]] = sext i8 [[TMP13]] to i32 +// SIMD-ONLY0-NEXT: [[CMP30:%.*]] = icmp slt i32 [[CONV28]], [[CONV29]] +// SIMD-ONLY0-NEXT: br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV33:%.*]] = sext i8 [[TMP14]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36:%.*]] +// SIMD-ONLY0: cond.false34: +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV35:%.*]] = sext i8 [[TMP15]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36]] +// SIMD-ONLY0: cond.end36: +// SIMD-ONLY0-NEXT: [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ] +// SIMD-ONLY0-NEXT: [[CONV38:%.*]] = trunc i32 [[COND37]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV38]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV39:%.*]] = sext i8 [[TMP16]] to i32 +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV40:%.*]] = sext i8 [[TMP17]] to i32 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[CONV39]], [[CONV40]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +// SIMD-ONLY0: if.then: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP18]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END]] +// SIMD-ONLY0: if.end: +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV43:%.*]] = sext i8 [[TMP19]] to i32 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV44:%.*]] = sext i8 [[TMP20]] to i32 +// SIMD-ONLY0-NEXT: [[CMP45:%.*]] = icmp slt i32 [[CONV43]], [[CONV44]] +// SIMD-ONLY0-NEXT: br i1 [[CMP45]], label [[IF_THEN47:%.*]], label [[IF_END48:%.*]] +// SIMD-ONLY0: if.then47: +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP21]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END48]] +// SIMD-ONLY0: if.end48: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV49:%.*]] = sext i8 [[TMP22]] to i32 +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV50:%.*]] = sext i8 [[TMP23]] to i32 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = icmp sgt i32 [[CONV49]], [[CONV50]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[IF_THEN53:%.*]], label [[IF_END54:%.*]] +// SIMD-ONLY0: if.then53: +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP24]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END54]] +// SIMD-ONLY0: if.end54: +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV55:%.*]] = sext i8 [[TMP25]] to i32 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV56:%.*]] = sext i8 [[TMP26]] to i32 +// SIMD-ONLY0-NEXT: [[CMP57:%.*]] = icmp slt i32 [[CONV55]], [[CONV56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP57]], label [[IF_THEN59:%.*]], label [[IF_END60:%.*]] +// SIMD-ONLY0: if.then59: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP27]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END60]] +// SIMD-ONLY0: if.end60: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV61:%.*]] = sext i8 [[TMP28]] to i32 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV62:%.*]] = sext i8 [[TMP29]] to i32 +// SIMD-ONLY0-NEXT: [[CMP63:%.*]] = icmp eq i32 [[CONV61]], [[CONV62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]] +// SIMD-ONLY0: cond.true65: +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV66:%.*]] = sext i8 [[TMP30]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false67: +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV68:%.*]] = sext i8 [[TMP31]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ] +// SIMD-ONLY0-NEXT: [[CONV71:%.*]] = trunc i32 [[COND70]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV71]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV72:%.*]] = sext i8 [[TMP32]] to i32 +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV73:%.*]] = sext i8 [[TMP33]] to i32 +// SIMD-ONLY0-NEXT: [[CMP74:%.*]] = icmp eq i32 [[CONV72]], [[CONV73]] +// SIMD-ONLY0-NEXT: br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true76: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV77:%.*]] = sext i8 [[TMP34]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV79:%.*]] = sext i8 [[TMP35]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80]] +// SIMD-ONLY0: cond.end80: +// SIMD-ONLY0-NEXT: [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: [[CONV82:%.*]] = trunc i32 [[COND81]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV82]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV83:%.*]] = sext i8 [[TMP36]] to i32 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV84:%.*]] = sext i8 [[TMP37]] to i32 +// SIMD-ONLY0-NEXT: [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]] +// SIMD-ONLY0-NEXT: br i1 [[CMP85]], label [[IF_THEN87:%.*]], label [[IF_END88:%.*]] +// SIMD-ONLY0: if.then87: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP38]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END88]] +// SIMD-ONLY0: if.end88: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV89:%.*]] = sext i8 [[TMP39]] to i32 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV90:%.*]] = sext i8 [[TMP40]] to i32 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp eq i32 [[CONV89]], [[CONV90]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[IF_THEN93:%.*]], label [[IF_END94:%.*]] +// SIMD-ONLY0: if.then93: +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP41]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END94]] +// SIMD-ONLY0: if.end94: +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV95:%.*]] = zext i8 [[TMP42]] to i32 +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV96:%.*]] = zext i8 [[TMP43]] to i32 +// SIMD-ONLY0-NEXT: [[CMP97:%.*]] = icmp sgt i32 [[CONV95]], [[CONV96]] +// SIMD-ONLY0-NEXT: br i1 [[CMP97]], label [[COND_TRUE99:%.*]], label [[COND_FALSE101:%.*]] +// SIMD-ONLY0: cond.true99: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV100:%.*]] = zext i8 [[TMP44]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END103:%.*]] +// SIMD-ONLY0: cond.false101: +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV102:%.*]] = zext i8 [[TMP45]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END103]] +// SIMD-ONLY0: cond.end103: +// SIMD-ONLY0-NEXT: [[COND104:%.*]] = phi i32 [ [[CONV100]], [[COND_TRUE99]] ], [ [[CONV102]], [[COND_FALSE101]] ] +// SIMD-ONLY0-NEXT: [[CONV105:%.*]] = trunc i32 [[COND104]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV105]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV106:%.*]] = zext i8 [[TMP46]] to i32 +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV107:%.*]] = zext i8 [[TMP47]] to i32 +// SIMD-ONLY0-NEXT: [[CMP108:%.*]] = icmp slt i32 [[CONV106]], [[CONV107]] +// SIMD-ONLY0-NEXT: br i1 [[CMP108]], label [[COND_TRUE110:%.*]], label [[COND_FALSE112:%.*]] +// SIMD-ONLY0: cond.true110: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV111:%.*]] = zext i8 [[TMP48]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false112: +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV113:%.*]] = zext i8 [[TMP49]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi i32 [ [[CONV111]], [[COND_TRUE110]] ], [ [[CONV113]], [[COND_FALSE112]] ] +// SIMD-ONLY0-NEXT: [[CONV116:%.*]] = trunc i32 [[COND115]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV116]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV117:%.*]] = zext i8 [[TMP50]] to i32 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV118:%.*]] = zext i8 [[TMP51]] to i32 +// SIMD-ONLY0-NEXT: [[CMP119:%.*]] = icmp sgt i32 [[CONV117]], [[CONV118]] +// SIMD-ONLY0-NEXT: br i1 [[CMP119]], label [[COND_TRUE121:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true121: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV122:%.*]] = zext i8 [[TMP52]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END125:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV124:%.*]] = zext i8 [[TMP53]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END125]] +// SIMD-ONLY0: cond.end125: +// SIMD-ONLY0-NEXT: [[COND126:%.*]] = phi i32 [ [[CONV122]], [[COND_TRUE121]] ], [ [[CONV124]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: [[CONV127:%.*]] = trunc i32 [[COND126]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV127]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV128:%.*]] = zext i8 [[TMP54]] to i32 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV129:%.*]] = zext i8 [[TMP55]] to i32 +// SIMD-ONLY0-NEXT: [[CMP130:%.*]] = icmp slt i32 [[CONV128]], [[CONV129]] +// SIMD-ONLY0-NEXT: br i1 [[CMP130]], label [[COND_TRUE132:%.*]], label [[COND_FALSE134:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV133:%.*]] = zext i8 [[TMP56]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END136:%.*]] +// SIMD-ONLY0: cond.false134: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV135:%.*]] = zext i8 [[TMP57]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END136]] +// SIMD-ONLY0: cond.end136: +// SIMD-ONLY0-NEXT: [[COND137:%.*]] = phi i32 [ [[CONV133]], [[COND_TRUE132]] ], [ [[CONV135]], [[COND_FALSE134]] ] +// SIMD-ONLY0-NEXT: [[CONV138:%.*]] = trunc i32 [[COND137]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV138]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV139:%.*]] = zext i8 [[TMP58]] to i32 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV140:%.*]] = zext i8 [[TMP59]] to i32 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp sgt i32 [[CONV139]], [[CONV140]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[IF_THEN143:%.*]], label [[IF_END144:%.*]] +// SIMD-ONLY0: if.then143: +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP60]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END144]] +// SIMD-ONLY0: if.end144: +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV145:%.*]] = zext i8 [[TMP61]] to i32 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV146:%.*]] = zext i8 [[TMP62]] to i32 +// SIMD-ONLY0-NEXT: [[CMP147:%.*]] = icmp slt i32 [[CONV145]], [[CONV146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP147]], label [[IF_THEN149:%.*]], label [[IF_END150:%.*]] +// SIMD-ONLY0: if.then149: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP63]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END150]] +// SIMD-ONLY0: if.end150: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV151:%.*]] = zext i8 [[TMP64]] to i32 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV152:%.*]] = zext i8 [[TMP65]] to i32 +// SIMD-ONLY0-NEXT: [[CMP153:%.*]] = icmp sgt i32 [[CONV151]], [[CONV152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP153]], label [[IF_THEN155:%.*]], label [[IF_END156:%.*]] +// SIMD-ONLY0: if.then155: +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP66]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END156]] +// SIMD-ONLY0: if.end156: +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV157:%.*]] = zext i8 [[TMP67]] to i32 +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV158:%.*]] = zext i8 [[TMP68]] to i32 +// SIMD-ONLY0-NEXT: [[CMP159:%.*]] = icmp slt i32 [[CONV157]], [[CONV158]] +// SIMD-ONLY0-NEXT: br i1 [[CMP159]], label [[IF_THEN161:%.*]], label [[IF_END162:%.*]] +// SIMD-ONLY0: if.then161: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP69]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END162]] +// SIMD-ONLY0: if.end162: +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV163:%.*]] = zext i8 [[TMP70]] to i32 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV164:%.*]] = zext i8 [[TMP71]] to i32 +// SIMD-ONLY0-NEXT: [[CMP165:%.*]] = icmp eq i32 [[CONV163]], [[CONV164]] +// SIMD-ONLY0-NEXT: br i1 [[CMP165]], label [[COND_TRUE167:%.*]], label [[COND_FALSE169:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV168:%.*]] = zext i8 [[TMP72]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END171:%.*]] +// SIMD-ONLY0: cond.false169: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV170:%.*]] = zext i8 [[TMP73]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END171]] +// SIMD-ONLY0: cond.end171: +// SIMD-ONLY0-NEXT: [[COND172:%.*]] = phi i32 [ [[CONV168]], [[COND_TRUE167]] ], [ [[CONV170]], [[COND_FALSE169]] ] +// SIMD-ONLY0-NEXT: [[CONV173:%.*]] = trunc i32 [[COND172]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV173]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV174:%.*]] = zext i8 [[TMP74]] to i32 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV175:%.*]] = zext i8 [[TMP75]] to i32 +// SIMD-ONLY0-NEXT: [[CMP176:%.*]] = icmp eq i32 [[CONV174]], [[CONV175]] +// SIMD-ONLY0-NEXT: br i1 [[CMP176]], label [[COND_TRUE178:%.*]], label [[COND_FALSE180:%.*]] +// SIMD-ONLY0: cond.true178: +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV179:%.*]] = zext i8 [[TMP76]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END182:%.*]] +// SIMD-ONLY0: cond.false180: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV181:%.*]] = zext i8 [[TMP77]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END182]] +// SIMD-ONLY0: cond.end182: +// SIMD-ONLY0-NEXT: [[COND183:%.*]] = phi i32 [ [[CONV179]], [[COND_TRUE178]] ], [ [[CONV181]], [[COND_FALSE180]] ] +// SIMD-ONLY0-NEXT: [[CONV184:%.*]] = trunc i32 [[COND183]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV184]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV185:%.*]] = zext i8 [[TMP78]] to i32 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV186:%.*]] = zext i8 [[TMP79]] to i32 +// SIMD-ONLY0-NEXT: [[CMP187:%.*]] = icmp eq i32 [[CONV185]], [[CONV186]] +// SIMD-ONLY0-NEXT: br i1 [[CMP187]], label [[IF_THEN189:%.*]], label [[IF_END190:%.*]] +// SIMD-ONLY0: if.then189: +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP80]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END190]] +// SIMD-ONLY0: if.end190: +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV191:%.*]] = zext i8 [[TMP81]] to i32 +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV192:%.*]] = zext i8 [[TMP82]] to i32 +// SIMD-ONLY0-NEXT: [[CMP193:%.*]] = icmp eq i32 [[CONV191]], [[CONV192]] +// SIMD-ONLY0-NEXT: br i1 [[CMP193]], label [[IF_THEN195:%.*]], label [[IF_END196:%.*]] +// SIMD-ONLY0: if.then195: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP83]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END196]] +// SIMD-ONLY0: if.end196: +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV197:%.*]] = sext i8 [[TMP84]] to i32 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV198:%.*]] = sext i8 [[TMP85]] to i32 +// SIMD-ONLY0-NEXT: [[CMP199:%.*]] = icmp sgt i32 [[CONV197]], [[CONV198]] +// SIMD-ONLY0-NEXT: br i1 [[CMP199]], label [[COND_TRUE201:%.*]], label [[COND_FALSE203:%.*]] +// SIMD-ONLY0: cond.true201: +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV202:%.*]] = sext i8 [[TMP86]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END205:%.*]] +// SIMD-ONLY0: cond.false203: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV204:%.*]] = sext i8 [[TMP87]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END205]] +// SIMD-ONLY0: cond.end205: +// SIMD-ONLY0-NEXT: [[COND206:%.*]] = phi i32 [ [[CONV202]], [[COND_TRUE201]] ], [ [[CONV204]], [[COND_FALSE203]] ] +// SIMD-ONLY0-NEXT: [[CONV207:%.*]] = trunc i32 [[COND206]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV207]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV208:%.*]] = sext i8 [[TMP88]] to i32 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV209:%.*]] = sext i8 [[TMP89]] to i32 +// SIMD-ONLY0-NEXT: [[CMP210:%.*]] = icmp slt i32 [[CONV208]], [[CONV209]] +// SIMD-ONLY0-NEXT: br i1 [[CMP210]], label [[COND_TRUE212:%.*]], label [[COND_FALSE214:%.*]] +// SIMD-ONLY0: cond.true212: +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV213:%.*]] = sext i8 [[TMP90]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END216:%.*]] +// SIMD-ONLY0: cond.false214: +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV215:%.*]] = sext i8 [[TMP91]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END216]] +// SIMD-ONLY0: cond.end216: +// SIMD-ONLY0-NEXT: [[COND217:%.*]] = phi i32 [ [[CONV213]], [[COND_TRUE212]] ], [ [[CONV215]], [[COND_FALSE214]] ] +// SIMD-ONLY0-NEXT: [[CONV218:%.*]] = trunc i32 [[COND217]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV218]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV219:%.*]] = sext i8 [[TMP92]] to i32 +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV220:%.*]] = sext i8 [[TMP93]] to i32 +// SIMD-ONLY0-NEXT: [[CMP221:%.*]] = icmp sgt i32 [[CONV219]], [[CONV220]] +// SIMD-ONLY0-NEXT: br i1 [[CMP221]], label [[COND_TRUE223:%.*]], label [[COND_FALSE225:%.*]] +// SIMD-ONLY0: cond.true223: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV224:%.*]] = sext i8 [[TMP94]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END227:%.*]] +// SIMD-ONLY0: cond.false225: +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV226:%.*]] = sext i8 [[TMP95]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END227]] +// SIMD-ONLY0: cond.end227: +// SIMD-ONLY0-NEXT: [[COND228:%.*]] = phi i32 [ [[CONV224]], [[COND_TRUE223]] ], [ [[CONV226]], [[COND_FALSE225]] ] +// SIMD-ONLY0-NEXT: [[CONV229:%.*]] = trunc i32 [[COND228]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV229]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV230:%.*]] = sext i8 [[TMP96]] to i32 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV231:%.*]] = sext i8 [[TMP97]] to i32 +// SIMD-ONLY0-NEXT: [[CMP232:%.*]] = icmp slt i32 [[CONV230]], [[CONV231]] +// SIMD-ONLY0-NEXT: br i1 [[CMP232]], label [[COND_TRUE234:%.*]], label [[COND_FALSE236:%.*]] +// SIMD-ONLY0: cond.true234: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV235:%.*]] = sext i8 [[TMP98]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END238:%.*]] +// SIMD-ONLY0: cond.false236: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV237:%.*]] = sext i8 [[TMP99]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END238]] +// SIMD-ONLY0: cond.end238: +// SIMD-ONLY0-NEXT: [[COND239:%.*]] = phi i32 [ [[CONV235]], [[COND_TRUE234]] ], [ [[CONV237]], [[COND_FALSE236]] ] +// SIMD-ONLY0-NEXT: [[CONV240:%.*]] = trunc i32 [[COND239]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV240]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV241:%.*]] = sext i8 [[TMP100]] to i32 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV242:%.*]] = sext i8 [[TMP101]] to i32 +// SIMD-ONLY0-NEXT: [[CMP243:%.*]] = icmp sgt i32 [[CONV241]], [[CONV242]] +// SIMD-ONLY0-NEXT: br i1 [[CMP243]], label [[IF_THEN245:%.*]], label [[IF_END246:%.*]] +// SIMD-ONLY0: if.then245: +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP102]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END246]] +// SIMD-ONLY0: if.end246: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV247:%.*]] = sext i8 [[TMP103]] to i32 +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV248:%.*]] = sext i8 [[TMP104]] to i32 +// SIMD-ONLY0-NEXT: [[CMP249:%.*]] = icmp slt i32 [[CONV247]], [[CONV248]] +// SIMD-ONLY0-NEXT: br i1 [[CMP249]], label [[IF_THEN251:%.*]], label [[IF_END252:%.*]] +// SIMD-ONLY0: if.then251: +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP105]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END252]] +// SIMD-ONLY0: if.end252: +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV253:%.*]] = sext i8 [[TMP106]] to i32 +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV254:%.*]] = sext i8 [[TMP107]] to i32 +// SIMD-ONLY0-NEXT: [[CMP255:%.*]] = icmp sgt i32 [[CONV253]], [[CONV254]] +// SIMD-ONLY0-NEXT: br i1 [[CMP255]], label [[IF_THEN257:%.*]], label [[IF_END258:%.*]] +// SIMD-ONLY0: if.then257: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP108]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END258]] +// SIMD-ONLY0: if.end258: +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV259:%.*]] = sext i8 [[TMP109]] to i32 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV260:%.*]] = sext i8 [[TMP110]] to i32 +// SIMD-ONLY0-NEXT: [[CMP261:%.*]] = icmp slt i32 [[CONV259]], [[CONV260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP261]], label [[IF_THEN263:%.*]], label [[IF_END264:%.*]] +// SIMD-ONLY0: if.then263: +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP111]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END264]] +// SIMD-ONLY0: if.end264: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV265:%.*]] = sext i8 [[TMP112]] to i32 +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV266:%.*]] = sext i8 [[TMP113]] to i32 +// SIMD-ONLY0-NEXT: [[CMP267:%.*]] = icmp eq i32 [[CONV265]], [[CONV266]] +// SIMD-ONLY0-NEXT: br i1 [[CMP267]], label [[COND_TRUE269:%.*]], label [[COND_FALSE271:%.*]] +// SIMD-ONLY0: cond.true269: +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV270:%.*]] = sext i8 [[TMP114]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END273:%.*]] +// SIMD-ONLY0: cond.false271: +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV272:%.*]] = sext i8 [[TMP115]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END273]] +// SIMD-ONLY0: cond.end273: +// SIMD-ONLY0-NEXT: [[COND274:%.*]] = phi i32 [ [[CONV270]], [[COND_TRUE269]] ], [ [[CONV272]], [[COND_FALSE271]] ] +// SIMD-ONLY0-NEXT: [[CONV275:%.*]] = trunc i32 [[COND274]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV275]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV276:%.*]] = sext i8 [[TMP116]] to i32 +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV277:%.*]] = sext i8 [[TMP117]] to i32 +// SIMD-ONLY0-NEXT: [[CMP278:%.*]] = icmp eq i32 [[CONV276]], [[CONV277]] +// SIMD-ONLY0-NEXT: br i1 [[CMP278]], label [[COND_TRUE280:%.*]], label [[COND_FALSE282:%.*]] +// SIMD-ONLY0: cond.true280: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV281:%.*]] = sext i8 [[TMP118]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END284:%.*]] +// SIMD-ONLY0: cond.false282: +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV283:%.*]] = sext i8 [[TMP119]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END284]] +// SIMD-ONLY0: cond.end284: +// SIMD-ONLY0-NEXT: [[COND285:%.*]] = phi i32 [ [[CONV281]], [[COND_TRUE280]] ], [ [[CONV283]], [[COND_FALSE282]] ] +// SIMD-ONLY0-NEXT: [[CONV286:%.*]] = trunc i32 [[COND285]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV286]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV287:%.*]] = sext i8 [[TMP120]] to i32 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV288:%.*]] = sext i8 [[TMP121]] to i32 +// SIMD-ONLY0-NEXT: [[CMP289:%.*]] = icmp eq i32 [[CONV287]], [[CONV288]] +// SIMD-ONLY0-NEXT: br i1 [[CMP289]], label [[IF_THEN291:%.*]], label [[IF_END292:%.*]] +// SIMD-ONLY0: if.then291: +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP122]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END292]] +// SIMD-ONLY0: if.end292: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV293:%.*]] = sext i8 [[TMP123]] to i32 +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV294:%.*]] = sext i8 [[TMP124]] to i32 +// SIMD-ONLY0-NEXT: [[CMP295:%.*]] = icmp eq i32 [[CONV293]], [[CONV294]] +// SIMD-ONLY0-NEXT: br i1 [[CMP295]], label [[IF_THEN297:%.*]], label [[IF_END298:%.*]] +// SIMD-ONLY0: if.then297: +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP125]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END298]] +// SIMD-ONLY0: if.end298: +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV299:%.*]] = zext i8 [[TMP126]] to i32 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV300:%.*]] = zext i8 [[TMP127]] to i32 +// SIMD-ONLY0-NEXT: [[CMP301:%.*]] = icmp sgt i32 [[CONV299]], [[CONV300]] +// SIMD-ONLY0-NEXT: br i1 [[CMP301]], label [[COND_TRUE303:%.*]], label [[COND_FALSE305:%.*]] +// SIMD-ONLY0: cond.true303: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV304:%.*]] = zext i8 [[TMP128]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END307:%.*]] +// SIMD-ONLY0: cond.false305: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV306:%.*]] = zext i8 [[TMP129]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END307]] +// SIMD-ONLY0: cond.end307: +// SIMD-ONLY0-NEXT: [[COND308:%.*]] = phi i32 [ [[CONV304]], [[COND_TRUE303]] ], [ [[CONV306]], [[COND_FALSE305]] ] +// SIMD-ONLY0-NEXT: [[CONV309:%.*]] = trunc i32 [[COND308]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV309]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV310:%.*]] = zext i8 [[TMP130]] to i32 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV311:%.*]] = zext i8 [[TMP131]] to i32 +// SIMD-ONLY0-NEXT: [[CMP312:%.*]] = icmp slt i32 [[CONV310]], [[CONV311]] +// SIMD-ONLY0-NEXT: br i1 [[CMP312]], label [[COND_TRUE314:%.*]], label [[COND_FALSE316:%.*]] +// SIMD-ONLY0: cond.true314: +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV315:%.*]] = zext i8 [[TMP132]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END318:%.*]] +// SIMD-ONLY0: cond.false316: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV317:%.*]] = zext i8 [[TMP133]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END318]] +// SIMD-ONLY0: cond.end318: +// SIMD-ONLY0-NEXT: [[COND319:%.*]] = phi i32 [ [[CONV315]], [[COND_TRUE314]] ], [ [[CONV317]], [[COND_FALSE316]] ] +// SIMD-ONLY0-NEXT: [[CONV320:%.*]] = trunc i32 [[COND319]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV320]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV321:%.*]] = zext i8 [[TMP134]] to i32 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV322:%.*]] = zext i8 [[TMP135]] to i32 +// SIMD-ONLY0-NEXT: [[CMP323:%.*]] = icmp sgt i32 [[CONV321]], [[CONV322]] +// SIMD-ONLY0-NEXT: br i1 [[CMP323]], label [[COND_TRUE325:%.*]], label [[COND_FALSE327:%.*]] +// SIMD-ONLY0: cond.true325: +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV326:%.*]] = zext i8 [[TMP136]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END329:%.*]] +// SIMD-ONLY0: cond.false327: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV328:%.*]] = zext i8 [[TMP137]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END329]] +// SIMD-ONLY0: cond.end329: +// SIMD-ONLY0-NEXT: [[COND330:%.*]] = phi i32 [ [[CONV326]], [[COND_TRUE325]] ], [ [[CONV328]], [[COND_FALSE327]] ] +// SIMD-ONLY0-NEXT: [[CONV331:%.*]] = trunc i32 [[COND330]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV331]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV332:%.*]] = zext i8 [[TMP138]] to i32 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV333:%.*]] = zext i8 [[TMP139]] to i32 +// SIMD-ONLY0-NEXT: [[CMP334:%.*]] = icmp slt i32 [[CONV332]], [[CONV333]] +// SIMD-ONLY0-NEXT: br i1 [[CMP334]], label [[COND_TRUE336:%.*]], label [[COND_FALSE338:%.*]] +// SIMD-ONLY0: cond.true336: +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV337:%.*]] = zext i8 [[TMP140]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END340:%.*]] +// SIMD-ONLY0: cond.false338: +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV339:%.*]] = zext i8 [[TMP141]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END340]] +// SIMD-ONLY0: cond.end340: +// SIMD-ONLY0-NEXT: [[COND341:%.*]] = phi i32 [ [[CONV337]], [[COND_TRUE336]] ], [ [[CONV339]], [[COND_FALSE338]] ] +// SIMD-ONLY0-NEXT: [[CONV342:%.*]] = trunc i32 [[COND341]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV342]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV343:%.*]] = zext i8 [[TMP142]] to i32 +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV344:%.*]] = zext i8 [[TMP143]] to i32 +// SIMD-ONLY0-NEXT: [[CMP345:%.*]] = icmp sgt i32 [[CONV343]], [[CONV344]] +// SIMD-ONLY0-NEXT: br i1 [[CMP345]], label [[IF_THEN347:%.*]], label [[IF_END348:%.*]] +// SIMD-ONLY0: if.then347: +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP144]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END348]] +// SIMD-ONLY0: if.end348: +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV349:%.*]] = zext i8 [[TMP145]] to i32 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV350:%.*]] = zext i8 [[TMP146]] to i32 +// SIMD-ONLY0-NEXT: [[CMP351:%.*]] = icmp slt i32 [[CONV349]], [[CONV350]] +// SIMD-ONLY0-NEXT: br i1 [[CMP351]], label [[IF_THEN353:%.*]], label [[IF_END354:%.*]] +// SIMD-ONLY0: if.then353: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP147]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END354]] +// SIMD-ONLY0: if.end354: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV355:%.*]] = zext i8 [[TMP148]] to i32 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV356:%.*]] = zext i8 [[TMP149]] to i32 +// SIMD-ONLY0-NEXT: [[CMP357:%.*]] = icmp sgt i32 [[CONV355]], [[CONV356]] +// SIMD-ONLY0-NEXT: br i1 [[CMP357]], label [[IF_THEN359:%.*]], label [[IF_END360:%.*]] +// SIMD-ONLY0: if.then359: +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP150]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END360]] +// SIMD-ONLY0: if.end360: +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV361:%.*]] = zext i8 [[TMP151]] to i32 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV362:%.*]] = zext i8 [[TMP152]] to i32 +// SIMD-ONLY0-NEXT: [[CMP363:%.*]] = icmp slt i32 [[CONV361]], [[CONV362]] +// SIMD-ONLY0-NEXT: br i1 [[CMP363]], label [[IF_THEN365:%.*]], label [[IF_END366:%.*]] +// SIMD-ONLY0: if.then365: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP153]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END366]] +// SIMD-ONLY0: if.end366: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV367:%.*]] = zext i8 [[TMP154]] to i32 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV368:%.*]] = zext i8 [[TMP155]] to i32 +// SIMD-ONLY0-NEXT: [[CMP369:%.*]] = icmp eq i32 [[CONV367]], [[CONV368]] +// SIMD-ONLY0-NEXT: br i1 [[CMP369]], label [[COND_TRUE371:%.*]], label [[COND_FALSE373:%.*]] +// SIMD-ONLY0: cond.true371: +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV372:%.*]] = zext i8 [[TMP156]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END375:%.*]] +// SIMD-ONLY0: cond.false373: +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV374:%.*]] = zext i8 [[TMP157]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END375]] +// SIMD-ONLY0: cond.end375: +// SIMD-ONLY0-NEXT: [[COND376:%.*]] = phi i32 [ [[CONV372]], [[COND_TRUE371]] ], [ [[CONV374]], [[COND_FALSE373]] ] +// SIMD-ONLY0-NEXT: [[CONV377:%.*]] = trunc i32 [[COND376]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV377]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV378:%.*]] = zext i8 [[TMP158]] to i32 +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV379:%.*]] = zext i8 [[TMP159]] to i32 +// SIMD-ONLY0-NEXT: [[CMP380:%.*]] = icmp eq i32 [[CONV378]], [[CONV379]] +// SIMD-ONLY0-NEXT: br i1 [[CMP380]], label [[COND_TRUE382:%.*]], label [[COND_FALSE384:%.*]] +// SIMD-ONLY0: cond.true382: +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV383:%.*]] = zext i8 [[TMP160]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END386:%.*]] +// SIMD-ONLY0: cond.false384: +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV385:%.*]] = zext i8 [[TMP161]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END386]] +// SIMD-ONLY0: cond.end386: +// SIMD-ONLY0-NEXT: [[COND387:%.*]] = phi i32 [ [[CONV383]], [[COND_TRUE382]] ], [ [[CONV385]], [[COND_FALSE384]] ] +// SIMD-ONLY0-NEXT: [[CONV388:%.*]] = trunc i32 [[COND387]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV388]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV389:%.*]] = zext i8 [[TMP162]] to i32 +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV390:%.*]] = zext i8 [[TMP163]] to i32 +// SIMD-ONLY0-NEXT: [[CMP391:%.*]] = icmp eq i32 [[CONV389]], [[CONV390]] +// SIMD-ONLY0-NEXT: br i1 [[CMP391]], label [[IF_THEN393:%.*]], label [[IF_END394:%.*]] +// SIMD-ONLY0: if.then393: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP164]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END394]] +// SIMD-ONLY0: if.end394: +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV395:%.*]] = zext i8 [[TMP165]] to i32 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV396:%.*]] = zext i8 [[TMP166]] to i32 +// SIMD-ONLY0-NEXT: [[CMP397:%.*]] = icmp eq i32 [[CONV395]], [[CONV396]] +// SIMD-ONLY0-NEXT: br i1 [[CMP397]], label [[IF_THEN399:%.*]], label [[IF_END400:%.*]] +// SIMD-ONLY0: if.then399: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP167]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END400]] +// SIMD-ONLY0: if.end400: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV401:%.*]] = sext i8 [[TMP168]] to i32 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV402:%.*]] = sext i8 [[TMP169]] to i32 +// SIMD-ONLY0-NEXT: [[CMP403:%.*]] = icmp sgt i32 [[CONV401]], [[CONV402]] +// SIMD-ONLY0-NEXT: br i1 [[CMP403]], label [[COND_TRUE405:%.*]], label [[COND_FALSE407:%.*]] +// SIMD-ONLY0: cond.true405: +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV406:%.*]] = sext i8 [[TMP170]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END409:%.*]] +// SIMD-ONLY0: cond.false407: +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV408:%.*]] = sext i8 [[TMP171]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END409]] +// SIMD-ONLY0: cond.end409: +// SIMD-ONLY0-NEXT: [[COND410:%.*]] = phi i32 [ [[CONV406]], [[COND_TRUE405]] ], [ [[CONV408]], [[COND_FALSE407]] ] +// SIMD-ONLY0-NEXT: [[CONV411:%.*]] = trunc i32 [[COND410]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV411]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV412:%.*]] = sext i8 [[TMP172]] to i32 +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV413:%.*]] = sext i8 [[TMP173]] to i32 +// SIMD-ONLY0-NEXT: [[CMP414:%.*]] = icmp slt i32 [[CONV412]], [[CONV413]] +// SIMD-ONLY0-NEXT: br i1 [[CMP414]], label [[COND_TRUE416:%.*]], label [[COND_FALSE418:%.*]] +// SIMD-ONLY0: cond.true416: +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV417:%.*]] = sext i8 [[TMP174]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END420:%.*]] +// SIMD-ONLY0: cond.false418: +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV419:%.*]] = sext i8 [[TMP175]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END420]] +// SIMD-ONLY0: cond.end420: +// SIMD-ONLY0-NEXT: [[COND421:%.*]] = phi i32 [ [[CONV417]], [[COND_TRUE416]] ], [ [[CONV419]], [[COND_FALSE418]] ] +// SIMD-ONLY0-NEXT: [[CONV422:%.*]] = trunc i32 [[COND421]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV422]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV423:%.*]] = sext i8 [[TMP176]] to i32 +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV424:%.*]] = sext i8 [[TMP177]] to i32 +// SIMD-ONLY0-NEXT: [[CMP425:%.*]] = icmp sgt i32 [[CONV423]], [[CONV424]] +// SIMD-ONLY0-NEXT: br i1 [[CMP425]], label [[COND_TRUE427:%.*]], label [[COND_FALSE429:%.*]] +// SIMD-ONLY0: cond.true427: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV428:%.*]] = sext i8 [[TMP178]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END431:%.*]] +// SIMD-ONLY0: cond.false429: +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV430:%.*]] = sext i8 [[TMP179]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END431]] +// SIMD-ONLY0: cond.end431: +// SIMD-ONLY0-NEXT: [[COND432:%.*]] = phi i32 [ [[CONV428]], [[COND_TRUE427]] ], [ [[CONV430]], [[COND_FALSE429]] ] +// SIMD-ONLY0-NEXT: [[CONV433:%.*]] = trunc i32 [[COND432]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV433]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV434:%.*]] = sext i8 [[TMP180]] to i32 +// SIMD-ONLY0-NEXT: [[TMP181:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV435:%.*]] = sext i8 [[TMP181]] to i32 +// SIMD-ONLY0-NEXT: [[CMP436:%.*]] = icmp slt i32 [[CONV434]], [[CONV435]] +// SIMD-ONLY0-NEXT: br i1 [[CMP436]], label [[COND_TRUE438:%.*]], label [[COND_FALSE440:%.*]] +// SIMD-ONLY0: cond.true438: +// SIMD-ONLY0-NEXT: [[TMP182:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV439:%.*]] = sext i8 [[TMP182]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END442:%.*]] +// SIMD-ONLY0: cond.false440: +// SIMD-ONLY0-NEXT: [[TMP183:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV441:%.*]] = sext i8 [[TMP183]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END442]] +// SIMD-ONLY0: cond.end442: +// SIMD-ONLY0-NEXT: [[COND443:%.*]] = phi i32 [ [[CONV439]], [[COND_TRUE438]] ], [ [[CONV441]], [[COND_FALSE440]] ] +// SIMD-ONLY0-NEXT: [[CONV444:%.*]] = trunc i32 [[COND443]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV444]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP184:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV445:%.*]] = sext i8 [[TMP184]] to i32 +// SIMD-ONLY0-NEXT: [[TMP185:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV446:%.*]] = sext i8 [[TMP185]] to i32 +// SIMD-ONLY0-NEXT: [[CMP447:%.*]] = icmp sgt i32 [[CONV445]], [[CONV446]] +// SIMD-ONLY0-NEXT: br i1 [[CMP447]], label [[IF_THEN449:%.*]], label [[IF_END450:%.*]] +// SIMD-ONLY0: if.then449: +// SIMD-ONLY0-NEXT: [[TMP186:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP186]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END450]] +// SIMD-ONLY0: if.end450: +// SIMD-ONLY0-NEXT: [[TMP187:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV451:%.*]] = sext i8 [[TMP187]] to i32 +// SIMD-ONLY0-NEXT: [[TMP188:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV452:%.*]] = sext i8 [[TMP188]] to i32 +// SIMD-ONLY0-NEXT: [[CMP453:%.*]] = icmp slt i32 [[CONV451]], [[CONV452]] +// SIMD-ONLY0-NEXT: br i1 [[CMP453]], label [[IF_THEN455:%.*]], label [[IF_END456:%.*]] +// SIMD-ONLY0: if.then455: +// SIMD-ONLY0-NEXT: [[TMP189:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP189]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END456]] +// SIMD-ONLY0: if.end456: +// SIMD-ONLY0-NEXT: [[TMP190:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV457:%.*]] = sext i8 [[TMP190]] to i32 +// SIMD-ONLY0-NEXT: [[TMP191:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV458:%.*]] = sext i8 [[TMP191]] to i32 +// SIMD-ONLY0-NEXT: [[CMP459:%.*]] = icmp sgt i32 [[CONV457]], [[CONV458]] +// SIMD-ONLY0-NEXT: br i1 [[CMP459]], label [[IF_THEN461:%.*]], label [[IF_END462:%.*]] +// SIMD-ONLY0: if.then461: +// SIMD-ONLY0-NEXT: [[TMP192:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP192]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END462]] +// SIMD-ONLY0: if.end462: +// SIMD-ONLY0-NEXT: [[TMP193:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV463:%.*]] = sext i8 [[TMP193]] to i32 +// SIMD-ONLY0-NEXT: [[TMP194:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV464:%.*]] = sext i8 [[TMP194]] to i32 +// SIMD-ONLY0-NEXT: [[CMP465:%.*]] = icmp slt i32 [[CONV463]], [[CONV464]] +// SIMD-ONLY0-NEXT: br i1 [[CMP465]], label [[IF_THEN467:%.*]], label [[IF_END468:%.*]] +// SIMD-ONLY0: if.then467: +// SIMD-ONLY0-NEXT: [[TMP195:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP195]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END468]] +// SIMD-ONLY0: if.end468: +// SIMD-ONLY0-NEXT: [[TMP196:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV469:%.*]] = sext i8 [[TMP196]] to i32 +// SIMD-ONLY0-NEXT: [[TMP197:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV470:%.*]] = sext i8 [[TMP197]] to i32 +// SIMD-ONLY0-NEXT: [[CMP471:%.*]] = icmp eq i32 [[CONV469]], [[CONV470]] +// SIMD-ONLY0-NEXT: br i1 [[CMP471]], label [[COND_TRUE473:%.*]], label [[COND_FALSE475:%.*]] +// SIMD-ONLY0: cond.true473: +// SIMD-ONLY0-NEXT: [[TMP198:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV474:%.*]] = sext i8 [[TMP198]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END477:%.*]] +// SIMD-ONLY0: cond.false475: +// SIMD-ONLY0-NEXT: [[TMP199:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV476:%.*]] = sext i8 [[TMP199]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END477]] +// SIMD-ONLY0: cond.end477: +// SIMD-ONLY0-NEXT: [[COND478:%.*]] = phi i32 [ [[CONV474]], [[COND_TRUE473]] ], [ [[CONV476]], [[COND_FALSE475]] ] +// SIMD-ONLY0-NEXT: [[CONV479:%.*]] = trunc i32 [[COND478]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV479]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP200:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV480:%.*]] = sext i8 [[TMP200]] to i32 +// SIMD-ONLY0-NEXT: [[TMP201:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV481:%.*]] = sext i8 [[TMP201]] to i32 +// SIMD-ONLY0-NEXT: [[CMP482:%.*]] = icmp eq i32 [[CONV480]], [[CONV481]] +// SIMD-ONLY0-NEXT: br i1 [[CMP482]], label [[COND_TRUE484:%.*]], label [[COND_FALSE486:%.*]] +// SIMD-ONLY0: cond.true484: +// SIMD-ONLY0-NEXT: [[TMP202:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV485:%.*]] = sext i8 [[TMP202]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END488:%.*]] +// SIMD-ONLY0: cond.false486: +// SIMD-ONLY0-NEXT: [[TMP203:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV487:%.*]] = sext i8 [[TMP203]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END488]] +// SIMD-ONLY0: cond.end488: +// SIMD-ONLY0-NEXT: [[COND489:%.*]] = phi i32 [ [[CONV485]], [[COND_TRUE484]] ], [ [[CONV487]], [[COND_FALSE486]] ] +// SIMD-ONLY0-NEXT: [[CONV490:%.*]] = trunc i32 [[COND489]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV490]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP204:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV491:%.*]] = sext i8 [[TMP204]] to i32 +// SIMD-ONLY0-NEXT: [[TMP205:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV492:%.*]] = sext i8 [[TMP205]] to i32 +// SIMD-ONLY0-NEXT: [[CMP493:%.*]] = icmp eq i32 [[CONV491]], [[CONV492]] +// SIMD-ONLY0-NEXT: br i1 [[CMP493]], label [[IF_THEN495:%.*]], label [[IF_END496:%.*]] +// SIMD-ONLY0: if.then495: +// SIMD-ONLY0-NEXT: [[TMP206:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP206]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END496]] +// SIMD-ONLY0: if.end496: +// SIMD-ONLY0-NEXT: [[TMP207:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV497:%.*]] = sext i8 [[TMP207]] to i32 +// SIMD-ONLY0-NEXT: [[TMP208:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV498:%.*]] = sext i8 [[TMP208]] to i32 +// SIMD-ONLY0-NEXT: [[CMP499:%.*]] = icmp eq i32 [[CONV497]], [[CONV498]] +// SIMD-ONLY0-NEXT: br i1 [[CMP499]], label [[IF_THEN501:%.*]], label [[IF_END502:%.*]] +// SIMD-ONLY0: if.then501: +// SIMD-ONLY0-NEXT: [[TMP209:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP209]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END502]] +// SIMD-ONLY0: if.end502: +// SIMD-ONLY0-NEXT: [[TMP210:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV503:%.*]] = zext i8 [[TMP210]] to i32 +// SIMD-ONLY0-NEXT: [[TMP211:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV504:%.*]] = zext i8 [[TMP211]] to i32 +// SIMD-ONLY0-NEXT: [[CMP505:%.*]] = icmp sgt i32 [[CONV503]], [[CONV504]] +// SIMD-ONLY0-NEXT: br i1 [[CMP505]], label [[COND_TRUE507:%.*]], label [[COND_FALSE509:%.*]] +// SIMD-ONLY0: cond.true507: +// SIMD-ONLY0-NEXT: [[TMP212:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV508:%.*]] = zext i8 [[TMP212]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END511:%.*]] +// SIMD-ONLY0: cond.false509: +// SIMD-ONLY0-NEXT: [[TMP213:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV510:%.*]] = zext i8 [[TMP213]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END511]] +// SIMD-ONLY0: cond.end511: +// SIMD-ONLY0-NEXT: [[COND512:%.*]] = phi i32 [ [[CONV508]], [[COND_TRUE507]] ], [ [[CONV510]], [[COND_FALSE509]] ] +// SIMD-ONLY0-NEXT: [[CONV513:%.*]] = trunc i32 [[COND512]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV513]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP214:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV514:%.*]] = zext i8 [[TMP214]] to i32 +// SIMD-ONLY0-NEXT: [[TMP215:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV515:%.*]] = zext i8 [[TMP215]] to i32 +// SIMD-ONLY0-NEXT: [[CMP516:%.*]] = icmp slt i32 [[CONV514]], [[CONV515]] +// SIMD-ONLY0-NEXT: br i1 [[CMP516]], label [[COND_TRUE518:%.*]], label [[COND_FALSE520:%.*]] +// SIMD-ONLY0: cond.true518: +// SIMD-ONLY0-NEXT: [[TMP216:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV519:%.*]] = zext i8 [[TMP216]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END522:%.*]] +// SIMD-ONLY0: cond.false520: +// SIMD-ONLY0-NEXT: [[TMP217:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV521:%.*]] = zext i8 [[TMP217]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END522]] +// SIMD-ONLY0: cond.end522: +// SIMD-ONLY0-NEXT: [[COND523:%.*]] = phi i32 [ [[CONV519]], [[COND_TRUE518]] ], [ [[CONV521]], [[COND_FALSE520]] ] +// SIMD-ONLY0-NEXT: [[CONV524:%.*]] = trunc i32 [[COND523]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV524]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP218:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV525:%.*]] = zext i8 [[TMP218]] to i32 +// SIMD-ONLY0-NEXT: [[TMP219:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV526:%.*]] = zext i8 [[TMP219]] to i32 +// SIMD-ONLY0-NEXT: [[CMP527:%.*]] = icmp sgt i32 [[CONV525]], [[CONV526]] +// SIMD-ONLY0-NEXT: br i1 [[CMP527]], label [[COND_TRUE529:%.*]], label [[COND_FALSE531:%.*]] +// SIMD-ONLY0: cond.true529: +// SIMD-ONLY0-NEXT: [[TMP220:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV530:%.*]] = zext i8 [[TMP220]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END533:%.*]] +// SIMD-ONLY0: cond.false531: +// SIMD-ONLY0-NEXT: [[TMP221:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV532:%.*]] = zext i8 [[TMP221]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END533]] +// SIMD-ONLY0: cond.end533: +// SIMD-ONLY0-NEXT: [[COND534:%.*]] = phi i32 [ [[CONV530]], [[COND_TRUE529]] ], [ [[CONV532]], [[COND_FALSE531]] ] +// SIMD-ONLY0-NEXT: [[CONV535:%.*]] = trunc i32 [[COND534]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV535]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP222:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV536:%.*]] = zext i8 [[TMP222]] to i32 +// SIMD-ONLY0-NEXT: [[TMP223:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV537:%.*]] = zext i8 [[TMP223]] to i32 +// SIMD-ONLY0-NEXT: [[CMP538:%.*]] = icmp slt i32 [[CONV536]], [[CONV537]] +// SIMD-ONLY0-NEXT: br i1 [[CMP538]], label [[COND_TRUE540:%.*]], label [[COND_FALSE542:%.*]] +// SIMD-ONLY0: cond.true540: +// SIMD-ONLY0-NEXT: [[TMP224:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV541:%.*]] = zext i8 [[TMP224]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END544:%.*]] +// SIMD-ONLY0: cond.false542: +// SIMD-ONLY0-NEXT: [[TMP225:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV543:%.*]] = zext i8 [[TMP225]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END544]] +// SIMD-ONLY0: cond.end544: +// SIMD-ONLY0-NEXT: [[COND545:%.*]] = phi i32 [ [[CONV541]], [[COND_TRUE540]] ], [ [[CONV543]], [[COND_FALSE542]] ] +// SIMD-ONLY0-NEXT: [[CONV546:%.*]] = trunc i32 [[COND545]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV546]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP226:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV547:%.*]] = zext i8 [[TMP226]] to i32 +// SIMD-ONLY0-NEXT: [[TMP227:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV548:%.*]] = zext i8 [[TMP227]] to i32 +// SIMD-ONLY0-NEXT: [[CMP549:%.*]] = icmp sgt i32 [[CONV547]], [[CONV548]] +// SIMD-ONLY0-NEXT: br i1 [[CMP549]], label [[IF_THEN551:%.*]], label [[IF_END552:%.*]] +// SIMD-ONLY0: if.then551: +// SIMD-ONLY0-NEXT: [[TMP228:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP228]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END552]] +// SIMD-ONLY0: if.end552: +// SIMD-ONLY0-NEXT: [[TMP229:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV553:%.*]] = zext i8 [[TMP229]] to i32 +// SIMD-ONLY0-NEXT: [[TMP230:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV554:%.*]] = zext i8 [[TMP230]] to i32 +// SIMD-ONLY0-NEXT: [[CMP555:%.*]] = icmp slt i32 [[CONV553]], [[CONV554]] +// SIMD-ONLY0-NEXT: br i1 [[CMP555]], label [[IF_THEN557:%.*]], label [[IF_END558:%.*]] +// SIMD-ONLY0: if.then557: +// SIMD-ONLY0-NEXT: [[TMP231:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP231]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END558]] +// SIMD-ONLY0: if.end558: +// SIMD-ONLY0-NEXT: [[TMP232:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV559:%.*]] = zext i8 [[TMP232]] to i32 +// SIMD-ONLY0-NEXT: [[TMP233:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV560:%.*]] = zext i8 [[TMP233]] to i32 +// SIMD-ONLY0-NEXT: [[CMP561:%.*]] = icmp sgt i32 [[CONV559]], [[CONV560]] +// SIMD-ONLY0-NEXT: br i1 [[CMP561]], label [[IF_THEN563:%.*]], label [[IF_END564:%.*]] +// SIMD-ONLY0: if.then563: +// SIMD-ONLY0-NEXT: [[TMP234:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP234]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END564]] +// SIMD-ONLY0: if.end564: +// SIMD-ONLY0-NEXT: [[TMP235:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV565:%.*]] = zext i8 [[TMP235]] to i32 +// SIMD-ONLY0-NEXT: [[TMP236:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV566:%.*]] = zext i8 [[TMP236]] to i32 +// SIMD-ONLY0-NEXT: [[CMP567:%.*]] = icmp slt i32 [[CONV565]], [[CONV566]] +// SIMD-ONLY0-NEXT: br i1 [[CMP567]], label [[IF_THEN569:%.*]], label [[IF_END570:%.*]] +// SIMD-ONLY0: if.then569: +// SIMD-ONLY0-NEXT: [[TMP237:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP237]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END570]] +// SIMD-ONLY0: if.end570: +// SIMD-ONLY0-NEXT: [[TMP238:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV571:%.*]] = zext i8 [[TMP238]] to i32 +// SIMD-ONLY0-NEXT: [[TMP239:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV572:%.*]] = zext i8 [[TMP239]] to i32 +// SIMD-ONLY0-NEXT: [[CMP573:%.*]] = icmp eq i32 [[CONV571]], [[CONV572]] +// SIMD-ONLY0-NEXT: br i1 [[CMP573]], label [[COND_TRUE575:%.*]], label [[COND_FALSE577:%.*]] +// SIMD-ONLY0: cond.true575: +// SIMD-ONLY0-NEXT: [[TMP240:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV576:%.*]] = zext i8 [[TMP240]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END579:%.*]] +// SIMD-ONLY0: cond.false577: +// SIMD-ONLY0-NEXT: [[TMP241:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV578:%.*]] = zext i8 [[TMP241]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END579]] +// SIMD-ONLY0: cond.end579: +// SIMD-ONLY0-NEXT: [[COND580:%.*]] = phi i32 [ [[CONV576]], [[COND_TRUE575]] ], [ [[CONV578]], [[COND_FALSE577]] ] +// SIMD-ONLY0-NEXT: [[CONV581:%.*]] = trunc i32 [[COND580]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV581]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP242:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV582:%.*]] = zext i8 [[TMP242]] to i32 +// SIMD-ONLY0-NEXT: [[TMP243:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV583:%.*]] = zext i8 [[TMP243]] to i32 +// SIMD-ONLY0-NEXT: [[CMP584:%.*]] = icmp eq i32 [[CONV582]], [[CONV583]] +// SIMD-ONLY0-NEXT: br i1 [[CMP584]], label [[COND_TRUE586:%.*]], label [[COND_FALSE588:%.*]] +// SIMD-ONLY0: cond.true586: +// SIMD-ONLY0-NEXT: [[TMP244:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV587:%.*]] = zext i8 [[TMP244]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END590:%.*]] +// SIMD-ONLY0: cond.false588: +// SIMD-ONLY0-NEXT: [[TMP245:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV589:%.*]] = zext i8 [[TMP245]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END590]] +// SIMD-ONLY0: cond.end590: +// SIMD-ONLY0-NEXT: [[COND591:%.*]] = phi i32 [ [[CONV587]], [[COND_TRUE586]] ], [ [[CONV589]], [[COND_FALSE588]] ] +// SIMD-ONLY0-NEXT: [[CONV592:%.*]] = trunc i32 [[COND591]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV592]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP246:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV593:%.*]] = zext i8 [[TMP246]] to i32 +// SIMD-ONLY0-NEXT: [[TMP247:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV594:%.*]] = zext i8 [[TMP247]] to i32 +// SIMD-ONLY0-NEXT: [[CMP595:%.*]] = icmp eq i32 [[CONV593]], [[CONV594]] +// SIMD-ONLY0-NEXT: br i1 [[CMP595]], label [[IF_THEN597:%.*]], label [[IF_END598:%.*]] +// SIMD-ONLY0: if.then597: +// SIMD-ONLY0-NEXT: [[TMP248:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP248]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END598]] +// SIMD-ONLY0: if.end598: +// SIMD-ONLY0-NEXT: [[TMP249:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV599:%.*]] = zext i8 [[TMP249]] to i32 +// SIMD-ONLY0-NEXT: [[TMP250:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV600:%.*]] = zext i8 [[TMP250]] to i32 +// SIMD-ONLY0-NEXT: [[CMP601:%.*]] = icmp eq i32 [[CONV599]], [[CONV600]] +// SIMD-ONLY0-NEXT: br i1 [[CMP601]], label [[IF_THEN603:%.*]], label [[IF_END604:%.*]] +// SIMD-ONLY0: if.then603: +// SIMD-ONLY0-NEXT: [[TMP251:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP251]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END604]] +// SIMD-ONLY0: if.end604: +// SIMD-ONLY0-NEXT: [[TMP252:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV605:%.*]] = sext i8 [[TMP252]] to i32 +// SIMD-ONLY0-NEXT: [[TMP253:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV606:%.*]] = sext i8 [[TMP253]] to i32 +// SIMD-ONLY0-NEXT: [[CMP607:%.*]] = icmp sgt i32 [[CONV605]], [[CONV606]] +// SIMD-ONLY0-NEXT: br i1 [[CMP607]], label [[COND_TRUE609:%.*]], label [[COND_FALSE611:%.*]] +// SIMD-ONLY0: cond.true609: +// SIMD-ONLY0-NEXT: [[TMP254:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV610:%.*]] = sext i8 [[TMP254]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END613:%.*]] +// SIMD-ONLY0: cond.false611: +// SIMD-ONLY0-NEXT: [[TMP255:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV612:%.*]] = sext i8 [[TMP255]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END613]] +// SIMD-ONLY0: cond.end613: +// SIMD-ONLY0-NEXT: [[COND614:%.*]] = phi i32 [ [[CONV610]], [[COND_TRUE609]] ], [ [[CONV612]], [[COND_FALSE611]] ] +// SIMD-ONLY0-NEXT: [[CONV615:%.*]] = trunc i32 [[COND614]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV615]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP256:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV616:%.*]] = sext i8 [[TMP256]] to i32 +// SIMD-ONLY0-NEXT: [[TMP257:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV617:%.*]] = sext i8 [[TMP257]] to i32 +// SIMD-ONLY0-NEXT: [[CMP618:%.*]] = icmp slt i32 [[CONV616]], [[CONV617]] +// SIMD-ONLY0-NEXT: br i1 [[CMP618]], label [[COND_TRUE620:%.*]], label [[COND_FALSE622:%.*]] +// SIMD-ONLY0: cond.true620: +// SIMD-ONLY0-NEXT: [[TMP258:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV621:%.*]] = sext i8 [[TMP258]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END624:%.*]] +// SIMD-ONLY0: cond.false622: +// SIMD-ONLY0-NEXT: [[TMP259:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV623:%.*]] = sext i8 [[TMP259]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END624]] +// SIMD-ONLY0: cond.end624: +// SIMD-ONLY0-NEXT: [[COND625:%.*]] = phi i32 [ [[CONV621]], [[COND_TRUE620]] ], [ [[CONV623]], [[COND_FALSE622]] ] +// SIMD-ONLY0-NEXT: [[CONV626:%.*]] = trunc i32 [[COND625]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV626]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP260:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV627:%.*]] = sext i8 [[TMP260]] to i32 +// SIMD-ONLY0-NEXT: [[TMP261:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV628:%.*]] = sext i8 [[TMP261]] to i32 +// SIMD-ONLY0-NEXT: [[CMP629:%.*]] = icmp sgt i32 [[CONV627]], [[CONV628]] +// SIMD-ONLY0-NEXT: br i1 [[CMP629]], label [[COND_TRUE631:%.*]], label [[COND_FALSE633:%.*]] +// SIMD-ONLY0: cond.true631: +// SIMD-ONLY0-NEXT: [[TMP262:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV632:%.*]] = sext i8 [[TMP262]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END635:%.*]] +// SIMD-ONLY0: cond.false633: +// SIMD-ONLY0-NEXT: [[TMP263:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV634:%.*]] = sext i8 [[TMP263]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END635]] +// SIMD-ONLY0: cond.end635: +// SIMD-ONLY0-NEXT: [[COND636:%.*]] = phi i32 [ [[CONV632]], [[COND_TRUE631]] ], [ [[CONV634]], [[COND_FALSE633]] ] +// SIMD-ONLY0-NEXT: [[CONV637:%.*]] = trunc i32 [[COND636]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV637]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP264:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV638:%.*]] = sext i8 [[TMP264]] to i32 +// SIMD-ONLY0-NEXT: [[TMP265:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV639:%.*]] = sext i8 [[TMP265]] to i32 +// SIMD-ONLY0-NEXT: [[CMP640:%.*]] = icmp slt i32 [[CONV638]], [[CONV639]] +// SIMD-ONLY0-NEXT: br i1 [[CMP640]], label [[COND_TRUE642:%.*]], label [[COND_FALSE644:%.*]] +// SIMD-ONLY0: cond.true642: +// SIMD-ONLY0-NEXT: [[TMP266:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV643:%.*]] = sext i8 [[TMP266]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END646:%.*]] +// SIMD-ONLY0: cond.false644: +// SIMD-ONLY0-NEXT: [[TMP267:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV645:%.*]] = sext i8 [[TMP267]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END646]] +// SIMD-ONLY0: cond.end646: +// SIMD-ONLY0-NEXT: [[COND647:%.*]] = phi i32 [ [[CONV643]], [[COND_TRUE642]] ], [ [[CONV645]], [[COND_FALSE644]] ] +// SIMD-ONLY0-NEXT: [[CONV648:%.*]] = trunc i32 [[COND647]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV648]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP268:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV649:%.*]] = sext i8 [[TMP268]] to i32 +// SIMD-ONLY0-NEXT: [[TMP269:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV650:%.*]] = sext i8 [[TMP269]] to i32 +// SIMD-ONLY0-NEXT: [[CMP651:%.*]] = icmp sgt i32 [[CONV649]], [[CONV650]] +// SIMD-ONLY0-NEXT: br i1 [[CMP651]], label [[IF_THEN653:%.*]], label [[IF_END654:%.*]] +// SIMD-ONLY0: if.then653: +// SIMD-ONLY0-NEXT: [[TMP270:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP270]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END654]] +// SIMD-ONLY0: if.end654: +// SIMD-ONLY0-NEXT: [[TMP271:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV655:%.*]] = sext i8 [[TMP271]] to i32 +// SIMD-ONLY0-NEXT: [[TMP272:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV656:%.*]] = sext i8 [[TMP272]] to i32 +// SIMD-ONLY0-NEXT: [[CMP657:%.*]] = icmp slt i32 [[CONV655]], [[CONV656]] +// SIMD-ONLY0-NEXT: br i1 [[CMP657]], label [[IF_THEN659:%.*]], label [[IF_END660:%.*]] +// SIMD-ONLY0: if.then659: +// SIMD-ONLY0-NEXT: [[TMP273:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP273]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END660]] +// SIMD-ONLY0: if.end660: +// SIMD-ONLY0-NEXT: [[TMP274:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV661:%.*]] = sext i8 [[TMP274]] to i32 +// SIMD-ONLY0-NEXT: [[TMP275:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV662:%.*]] = sext i8 [[TMP275]] to i32 +// SIMD-ONLY0-NEXT: [[CMP663:%.*]] = icmp sgt i32 [[CONV661]], [[CONV662]] +// SIMD-ONLY0-NEXT: br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END666:%.*]] +// SIMD-ONLY0: if.then665: +// SIMD-ONLY0-NEXT: [[TMP276:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP276]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END666]] +// SIMD-ONLY0: if.end666: +// SIMD-ONLY0-NEXT: [[TMP277:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV667:%.*]] = sext i8 [[TMP277]] to i32 +// SIMD-ONLY0-NEXT: [[TMP278:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV668:%.*]] = sext i8 [[TMP278]] to i32 +// SIMD-ONLY0-NEXT: [[CMP669:%.*]] = icmp slt i32 [[CONV667]], [[CONV668]] +// SIMD-ONLY0-NEXT: br i1 [[CMP669]], label [[IF_THEN671:%.*]], label [[IF_END672:%.*]] +// SIMD-ONLY0: if.then671: +// SIMD-ONLY0-NEXT: [[TMP279:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP279]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END672]] +// SIMD-ONLY0: if.end672: +// SIMD-ONLY0-NEXT: [[TMP280:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV673:%.*]] = sext i8 [[TMP280]] to i32 +// SIMD-ONLY0-NEXT: [[TMP281:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV674:%.*]] = sext i8 [[TMP281]] to i32 +// SIMD-ONLY0-NEXT: [[CMP675:%.*]] = icmp eq i32 [[CONV673]], [[CONV674]] +// SIMD-ONLY0-NEXT: br i1 [[CMP675]], label [[COND_TRUE677:%.*]], label [[COND_FALSE679:%.*]] +// SIMD-ONLY0: cond.true677: +// SIMD-ONLY0-NEXT: [[TMP282:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV678:%.*]] = sext i8 [[TMP282]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END681:%.*]] +// SIMD-ONLY0: cond.false679: +// SIMD-ONLY0-NEXT: [[TMP283:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV680:%.*]] = sext i8 [[TMP283]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END681]] +// SIMD-ONLY0: cond.end681: +// SIMD-ONLY0-NEXT: [[COND682:%.*]] = phi i32 [ [[CONV678]], [[COND_TRUE677]] ], [ [[CONV680]], [[COND_FALSE679]] ] +// SIMD-ONLY0-NEXT: [[CONV683:%.*]] = trunc i32 [[COND682]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV683]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP284:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV684:%.*]] = sext i8 [[TMP284]] to i32 +// SIMD-ONLY0-NEXT: [[TMP285:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV685:%.*]] = sext i8 [[TMP285]] to i32 +// SIMD-ONLY0-NEXT: [[CMP686:%.*]] = icmp eq i32 [[CONV684]], [[CONV685]] +// SIMD-ONLY0-NEXT: br i1 [[CMP686]], label [[COND_TRUE688:%.*]], label [[COND_FALSE690:%.*]] +// SIMD-ONLY0: cond.true688: +// SIMD-ONLY0-NEXT: [[TMP286:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV689:%.*]] = sext i8 [[TMP286]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END692:%.*]] +// SIMD-ONLY0: cond.false690: +// SIMD-ONLY0-NEXT: [[TMP287:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV691:%.*]] = sext i8 [[TMP287]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END692]] +// SIMD-ONLY0: cond.end692: +// SIMD-ONLY0-NEXT: [[COND693:%.*]] = phi i32 [ [[CONV689]], [[COND_TRUE688]] ], [ [[CONV691]], [[COND_FALSE690]] ] +// SIMD-ONLY0-NEXT: [[CONV694:%.*]] = trunc i32 [[COND693]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV694]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP288:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV695:%.*]] = sext i8 [[TMP288]] to i32 +// SIMD-ONLY0-NEXT: [[TMP289:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV696:%.*]] = sext i8 [[TMP289]] to i32 +// SIMD-ONLY0-NEXT: [[CMP697:%.*]] = icmp eq i32 [[CONV695]], [[CONV696]] +// SIMD-ONLY0-NEXT: br i1 [[CMP697]], label [[IF_THEN699:%.*]], label [[IF_END700:%.*]] +// SIMD-ONLY0: if.then699: +// SIMD-ONLY0-NEXT: [[TMP290:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP290]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END700]] +// SIMD-ONLY0: if.end700: +// SIMD-ONLY0-NEXT: [[TMP291:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV701:%.*]] = sext i8 [[TMP291]] to i32 +// SIMD-ONLY0-NEXT: [[TMP292:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV702:%.*]] = sext i8 [[TMP292]] to i32 +// SIMD-ONLY0-NEXT: [[CMP703:%.*]] = icmp eq i32 [[CONV701]], [[CONV702]] +// SIMD-ONLY0-NEXT: br i1 [[CMP703]], label [[IF_THEN705:%.*]], label [[IF_END706:%.*]] +// SIMD-ONLY0: if.then705: +// SIMD-ONLY0-NEXT: [[TMP293:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP293]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END706]] +// SIMD-ONLY0: if.end706: +// SIMD-ONLY0-NEXT: [[TMP294:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV707:%.*]] = zext i8 [[TMP294]] to i32 +// SIMD-ONLY0-NEXT: [[TMP295:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV708:%.*]] = zext i8 [[TMP295]] to i32 +// SIMD-ONLY0-NEXT: [[CMP709:%.*]] = icmp sgt i32 [[CONV707]], [[CONV708]] +// SIMD-ONLY0-NEXT: br i1 [[CMP709]], label [[COND_TRUE711:%.*]], label [[COND_FALSE713:%.*]] +// SIMD-ONLY0: cond.true711: +// SIMD-ONLY0-NEXT: [[TMP296:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV712:%.*]] = zext i8 [[TMP296]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END715:%.*]] +// SIMD-ONLY0: cond.false713: +// SIMD-ONLY0-NEXT: [[TMP297:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV714:%.*]] = zext i8 [[TMP297]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END715]] +// SIMD-ONLY0: cond.end715: +// SIMD-ONLY0-NEXT: [[COND716:%.*]] = phi i32 [ [[CONV712]], [[COND_TRUE711]] ], [ [[CONV714]], [[COND_FALSE713]] ] +// SIMD-ONLY0-NEXT: [[CONV717:%.*]] = trunc i32 [[COND716]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV717]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP298:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV718:%.*]] = zext i8 [[TMP298]] to i32 +// SIMD-ONLY0-NEXT: [[TMP299:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV719:%.*]] = zext i8 [[TMP299]] to i32 +// SIMD-ONLY0-NEXT: [[CMP720:%.*]] = icmp slt i32 [[CONV718]], [[CONV719]] +// SIMD-ONLY0-NEXT: br i1 [[CMP720]], label [[COND_TRUE722:%.*]], label [[COND_FALSE724:%.*]] +// SIMD-ONLY0: cond.true722: +// SIMD-ONLY0-NEXT: [[TMP300:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV723:%.*]] = zext i8 [[TMP300]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END726:%.*]] +// SIMD-ONLY0: cond.false724: +// SIMD-ONLY0-NEXT: [[TMP301:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV725:%.*]] = zext i8 [[TMP301]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END726]] +// SIMD-ONLY0: cond.end726: +// SIMD-ONLY0-NEXT: [[COND727:%.*]] = phi i32 [ [[CONV723]], [[COND_TRUE722]] ], [ [[CONV725]], [[COND_FALSE724]] ] +// SIMD-ONLY0-NEXT: [[CONV728:%.*]] = trunc i32 [[COND727]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV728]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP302:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV729:%.*]] = zext i8 [[TMP302]] to i32 +// SIMD-ONLY0-NEXT: [[TMP303:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV730:%.*]] = zext i8 [[TMP303]] to i32 +// SIMD-ONLY0-NEXT: [[CMP731:%.*]] = icmp sgt i32 [[CONV729]], [[CONV730]] +// SIMD-ONLY0-NEXT: br i1 [[CMP731]], label [[COND_TRUE733:%.*]], label [[COND_FALSE735:%.*]] +// SIMD-ONLY0: cond.true733: +// SIMD-ONLY0-NEXT: [[TMP304:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV734:%.*]] = zext i8 [[TMP304]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END737:%.*]] +// SIMD-ONLY0: cond.false735: +// SIMD-ONLY0-NEXT: [[TMP305:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV736:%.*]] = zext i8 [[TMP305]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END737]] +// SIMD-ONLY0: cond.end737: +// SIMD-ONLY0-NEXT: [[COND738:%.*]] = phi i32 [ [[CONV734]], [[COND_TRUE733]] ], [ [[CONV736]], [[COND_FALSE735]] ] +// SIMD-ONLY0-NEXT: [[CONV739:%.*]] = trunc i32 [[COND738]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV739]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP306:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV740:%.*]] = zext i8 [[TMP306]] to i32 +// SIMD-ONLY0-NEXT: [[TMP307:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV741:%.*]] = zext i8 [[TMP307]] to i32 +// SIMD-ONLY0-NEXT: [[CMP742:%.*]] = icmp slt i32 [[CONV740]], [[CONV741]] +// SIMD-ONLY0-NEXT: br i1 [[CMP742]], label [[COND_TRUE744:%.*]], label [[COND_FALSE746:%.*]] +// SIMD-ONLY0: cond.true744: +// SIMD-ONLY0-NEXT: [[TMP308:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV745:%.*]] = zext i8 [[TMP308]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END748:%.*]] +// SIMD-ONLY0: cond.false746: +// SIMD-ONLY0-NEXT: [[TMP309:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV747:%.*]] = zext i8 [[TMP309]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END748]] +// SIMD-ONLY0: cond.end748: +// SIMD-ONLY0-NEXT: [[COND749:%.*]] = phi i32 [ [[CONV745]], [[COND_TRUE744]] ], [ [[CONV747]], [[COND_FALSE746]] ] +// SIMD-ONLY0-NEXT: [[CONV750:%.*]] = trunc i32 [[COND749]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV750]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP310:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV751:%.*]] = zext i8 [[TMP310]] to i32 +// SIMD-ONLY0-NEXT: [[TMP311:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV752:%.*]] = zext i8 [[TMP311]] to i32 +// SIMD-ONLY0-NEXT: [[CMP753:%.*]] = icmp sgt i32 [[CONV751]], [[CONV752]] +// SIMD-ONLY0-NEXT: br i1 [[CMP753]], label [[IF_THEN755:%.*]], label [[IF_END756:%.*]] +// SIMD-ONLY0: if.then755: +// SIMD-ONLY0-NEXT: [[TMP312:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP312]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END756]] +// SIMD-ONLY0: if.end756: +// SIMD-ONLY0-NEXT: [[TMP313:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV757:%.*]] = zext i8 [[TMP313]] to i32 +// SIMD-ONLY0-NEXT: [[TMP314:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV758:%.*]] = zext i8 [[TMP314]] to i32 +// SIMD-ONLY0-NEXT: [[CMP759:%.*]] = icmp slt i32 [[CONV757]], [[CONV758]] +// SIMD-ONLY0-NEXT: br i1 [[CMP759]], label [[IF_THEN761:%.*]], label [[IF_END762:%.*]] +// SIMD-ONLY0: if.then761: +// SIMD-ONLY0-NEXT: [[TMP315:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP315]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END762]] +// SIMD-ONLY0: if.end762: +// SIMD-ONLY0-NEXT: [[TMP316:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV763:%.*]] = zext i8 [[TMP316]] to i32 +// SIMD-ONLY0-NEXT: [[TMP317:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV764:%.*]] = zext i8 [[TMP317]] to i32 +// SIMD-ONLY0-NEXT: [[CMP765:%.*]] = icmp sgt i32 [[CONV763]], [[CONV764]] +// SIMD-ONLY0-NEXT: br i1 [[CMP765]], label [[IF_THEN767:%.*]], label [[IF_END768:%.*]] +// SIMD-ONLY0: if.then767: +// SIMD-ONLY0-NEXT: [[TMP318:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP318]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END768]] +// SIMD-ONLY0: if.end768: +// SIMD-ONLY0-NEXT: [[TMP319:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV769:%.*]] = zext i8 [[TMP319]] to i32 +// SIMD-ONLY0-NEXT: [[TMP320:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV770:%.*]] = zext i8 [[TMP320]] to i32 +// SIMD-ONLY0-NEXT: [[CMP771:%.*]] = icmp slt i32 [[CONV769]], [[CONV770]] +// SIMD-ONLY0-NEXT: br i1 [[CMP771]], label [[IF_THEN773:%.*]], label [[IF_END774:%.*]] +// SIMD-ONLY0: if.then773: +// SIMD-ONLY0-NEXT: [[TMP321:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP321]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END774]] +// SIMD-ONLY0: if.end774: +// SIMD-ONLY0-NEXT: [[TMP322:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV775:%.*]] = zext i8 [[TMP322]] to i32 +// SIMD-ONLY0-NEXT: [[TMP323:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV776:%.*]] = zext i8 [[TMP323]] to i32 +// SIMD-ONLY0-NEXT: [[CMP777:%.*]] = icmp eq i32 [[CONV775]], [[CONV776]] +// SIMD-ONLY0-NEXT: br i1 [[CMP777]], label [[COND_TRUE779:%.*]], label [[COND_FALSE781:%.*]] +// SIMD-ONLY0: cond.true779: +// SIMD-ONLY0-NEXT: [[TMP324:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV780:%.*]] = zext i8 [[TMP324]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END783:%.*]] +// SIMD-ONLY0: cond.false781: +// SIMD-ONLY0-NEXT: [[TMP325:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV782:%.*]] = zext i8 [[TMP325]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END783]] +// SIMD-ONLY0: cond.end783: +// SIMD-ONLY0-NEXT: [[COND784:%.*]] = phi i32 [ [[CONV780]], [[COND_TRUE779]] ], [ [[CONV782]], [[COND_FALSE781]] ] +// SIMD-ONLY0-NEXT: [[CONV785:%.*]] = trunc i32 [[COND784]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV785]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP326:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV786:%.*]] = zext i8 [[TMP326]] to i32 +// SIMD-ONLY0-NEXT: [[TMP327:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV787:%.*]] = zext i8 [[TMP327]] to i32 +// SIMD-ONLY0-NEXT: [[CMP788:%.*]] = icmp eq i32 [[CONV786]], [[CONV787]] +// SIMD-ONLY0-NEXT: br i1 [[CMP788]], label [[COND_TRUE790:%.*]], label [[COND_FALSE792:%.*]] +// SIMD-ONLY0: cond.true790: +// SIMD-ONLY0-NEXT: [[TMP328:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV791:%.*]] = zext i8 [[TMP328]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END794:%.*]] +// SIMD-ONLY0: cond.false792: +// SIMD-ONLY0-NEXT: [[TMP329:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV793:%.*]] = zext i8 [[TMP329]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END794]] +// SIMD-ONLY0: cond.end794: +// SIMD-ONLY0-NEXT: [[COND795:%.*]] = phi i32 [ [[CONV791]], [[COND_TRUE790]] ], [ [[CONV793]], [[COND_FALSE792]] ] +// SIMD-ONLY0-NEXT: [[CONV796:%.*]] = trunc i32 [[COND795]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV796]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP330:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV797:%.*]] = zext i8 [[TMP330]] to i32 +// SIMD-ONLY0-NEXT: [[TMP331:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV798:%.*]] = zext i8 [[TMP331]] to i32 +// SIMD-ONLY0-NEXT: [[CMP799:%.*]] = icmp eq i32 [[CONV797]], [[CONV798]] +// SIMD-ONLY0-NEXT: br i1 [[CMP799]], label [[IF_THEN801:%.*]], label [[IF_END802:%.*]] +// SIMD-ONLY0: if.then801: +// SIMD-ONLY0-NEXT: [[TMP332:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP332]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END802]] +// SIMD-ONLY0: if.end802: +// SIMD-ONLY0-NEXT: [[TMP333:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV803:%.*]] = zext i8 [[TMP333]] to i32 +// SIMD-ONLY0-NEXT: [[TMP334:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV804:%.*]] = zext i8 [[TMP334]] to i32 +// SIMD-ONLY0-NEXT: [[CMP805:%.*]] = icmp eq i32 [[CONV803]], [[CONV804]] +// SIMD-ONLY0-NEXT: br i1 [[CMP805]], label [[IF_THEN807:%.*]], label [[IF_END808:%.*]] +// SIMD-ONLY0: if.then807: +// SIMD-ONLY0-NEXT: [[TMP335:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP335]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END808]] +// SIMD-ONLY0: if.end808: +// SIMD-ONLY0-NEXT: [[TMP336:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV809:%.*]] = sext i8 [[TMP336]] to i32 +// SIMD-ONLY0-NEXT: [[TMP337:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV810:%.*]] = sext i8 [[TMP337]] to i32 +// SIMD-ONLY0-NEXT: [[CMP811:%.*]] = icmp sgt i32 [[CONV809]], [[CONV810]] +// SIMD-ONLY0-NEXT: br i1 [[CMP811]], label [[COND_TRUE813:%.*]], label [[COND_FALSE815:%.*]] +// SIMD-ONLY0: cond.true813: +// SIMD-ONLY0-NEXT: [[TMP338:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV814:%.*]] = sext i8 [[TMP338]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END817:%.*]] +// SIMD-ONLY0: cond.false815: +// SIMD-ONLY0-NEXT: [[TMP339:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV816:%.*]] = sext i8 [[TMP339]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END817]] +// SIMD-ONLY0: cond.end817: +// SIMD-ONLY0-NEXT: [[COND818:%.*]] = phi i32 [ [[CONV814]], [[COND_TRUE813]] ], [ [[CONV816]], [[COND_FALSE815]] ] +// SIMD-ONLY0-NEXT: [[CONV819:%.*]] = trunc i32 [[COND818]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV819]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP340:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV820:%.*]] = sext i8 [[TMP340]] to i32 +// SIMD-ONLY0-NEXT: [[TMP341:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV821:%.*]] = sext i8 [[TMP341]] to i32 +// SIMD-ONLY0-NEXT: [[CMP822:%.*]] = icmp slt i32 [[CONV820]], [[CONV821]] +// SIMD-ONLY0-NEXT: br i1 [[CMP822]], label [[COND_TRUE824:%.*]], label [[COND_FALSE826:%.*]] +// SIMD-ONLY0: cond.true824: +// SIMD-ONLY0-NEXT: [[TMP342:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV825:%.*]] = sext i8 [[TMP342]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END828:%.*]] +// SIMD-ONLY0: cond.false826: +// SIMD-ONLY0-NEXT: [[TMP343:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV827:%.*]] = sext i8 [[TMP343]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END828]] +// SIMD-ONLY0: cond.end828: +// SIMD-ONLY0-NEXT: [[COND829:%.*]] = phi i32 [ [[CONV825]], [[COND_TRUE824]] ], [ [[CONV827]], [[COND_FALSE826]] ] +// SIMD-ONLY0-NEXT: [[CONV830:%.*]] = trunc i32 [[COND829]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV830]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP344:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV831:%.*]] = sext i8 [[TMP344]] to i32 +// SIMD-ONLY0-NEXT: [[TMP345:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV832:%.*]] = sext i8 [[TMP345]] to i32 +// SIMD-ONLY0-NEXT: [[CMP833:%.*]] = icmp sgt i32 [[CONV831]], [[CONV832]] +// SIMD-ONLY0-NEXT: br i1 [[CMP833]], label [[COND_TRUE835:%.*]], label [[COND_FALSE837:%.*]] +// SIMD-ONLY0: cond.true835: +// SIMD-ONLY0-NEXT: [[TMP346:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV836:%.*]] = sext i8 [[TMP346]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END839:%.*]] +// SIMD-ONLY0: cond.false837: +// SIMD-ONLY0-NEXT: [[TMP347:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV838:%.*]] = sext i8 [[TMP347]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END839]] +// SIMD-ONLY0: cond.end839: +// SIMD-ONLY0-NEXT: [[COND840:%.*]] = phi i32 [ [[CONV836]], [[COND_TRUE835]] ], [ [[CONV838]], [[COND_FALSE837]] ] +// SIMD-ONLY0-NEXT: [[CONV841:%.*]] = trunc i32 [[COND840]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV841]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP348:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV842:%.*]] = sext i8 [[TMP348]] to i32 +// SIMD-ONLY0-NEXT: [[TMP349:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV843:%.*]] = sext i8 [[TMP349]] to i32 +// SIMD-ONLY0-NEXT: [[CMP844:%.*]] = icmp slt i32 [[CONV842]], [[CONV843]] +// SIMD-ONLY0-NEXT: br i1 [[CMP844]], label [[COND_TRUE846:%.*]], label [[COND_FALSE848:%.*]] +// SIMD-ONLY0: cond.true846: +// SIMD-ONLY0-NEXT: [[TMP350:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV847:%.*]] = sext i8 [[TMP350]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END850:%.*]] +// SIMD-ONLY0: cond.false848: +// SIMD-ONLY0-NEXT: [[TMP351:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV849:%.*]] = sext i8 [[TMP351]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END850]] +// SIMD-ONLY0: cond.end850: +// SIMD-ONLY0-NEXT: [[COND851:%.*]] = phi i32 [ [[CONV847]], [[COND_TRUE846]] ], [ [[CONV849]], [[COND_FALSE848]] ] +// SIMD-ONLY0-NEXT: [[CONV852:%.*]] = trunc i32 [[COND851]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV852]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP352:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV853:%.*]] = sext i8 [[TMP352]] to i32 +// SIMD-ONLY0-NEXT: [[TMP353:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV854:%.*]] = sext i8 [[TMP353]] to i32 +// SIMD-ONLY0-NEXT: [[CMP855:%.*]] = icmp sgt i32 [[CONV853]], [[CONV854]] +// SIMD-ONLY0-NEXT: br i1 [[CMP855]], label [[IF_THEN857:%.*]], label [[IF_END858:%.*]] +// SIMD-ONLY0: if.then857: +// SIMD-ONLY0-NEXT: [[TMP354:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP354]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END858]] +// SIMD-ONLY0: if.end858: +// SIMD-ONLY0-NEXT: [[TMP355:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV859:%.*]] = sext i8 [[TMP355]] to i32 +// SIMD-ONLY0-NEXT: [[TMP356:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV860:%.*]] = sext i8 [[TMP356]] to i32 +// SIMD-ONLY0-NEXT: [[CMP861:%.*]] = icmp slt i32 [[CONV859]], [[CONV860]] +// SIMD-ONLY0-NEXT: br i1 [[CMP861]], label [[IF_THEN863:%.*]], label [[IF_END864:%.*]] +// SIMD-ONLY0: if.then863: +// SIMD-ONLY0-NEXT: [[TMP357:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP357]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END864]] +// SIMD-ONLY0: if.end864: +// SIMD-ONLY0-NEXT: [[TMP358:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV865:%.*]] = sext i8 [[TMP358]] to i32 +// SIMD-ONLY0-NEXT: [[TMP359:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV866:%.*]] = sext i8 [[TMP359]] to i32 +// SIMD-ONLY0-NEXT: [[CMP867:%.*]] = icmp sgt i32 [[CONV865]], [[CONV866]] +// SIMD-ONLY0-NEXT: br i1 [[CMP867]], label [[IF_THEN869:%.*]], label [[IF_END870:%.*]] +// SIMD-ONLY0: if.then869: +// SIMD-ONLY0-NEXT: [[TMP360:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP360]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END870]] +// SIMD-ONLY0: if.end870: +// SIMD-ONLY0-NEXT: [[TMP361:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV871:%.*]] = sext i8 [[TMP361]] to i32 +// SIMD-ONLY0-NEXT: [[TMP362:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV872:%.*]] = sext i8 [[TMP362]] to i32 +// SIMD-ONLY0-NEXT: [[CMP873:%.*]] = icmp slt i32 [[CONV871]], [[CONV872]] +// SIMD-ONLY0-NEXT: br i1 [[CMP873]], label [[IF_THEN875:%.*]], label [[IF_END876:%.*]] +// SIMD-ONLY0: if.then875: +// SIMD-ONLY0-NEXT: [[TMP363:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP363]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END876]] +// SIMD-ONLY0: if.end876: +// SIMD-ONLY0-NEXT: [[TMP364:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV877:%.*]] = sext i8 [[TMP364]] to i32 +// SIMD-ONLY0-NEXT: [[TMP365:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV878:%.*]] = sext i8 [[TMP365]] to i32 +// SIMD-ONLY0-NEXT: [[CMP879:%.*]] = icmp eq i32 [[CONV877]], [[CONV878]] +// SIMD-ONLY0-NEXT: br i1 [[CMP879]], label [[COND_TRUE881:%.*]], label [[COND_FALSE883:%.*]] +// SIMD-ONLY0: cond.true881: +// SIMD-ONLY0-NEXT: [[TMP366:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV882:%.*]] = sext i8 [[TMP366]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END885:%.*]] +// SIMD-ONLY0: cond.false883: +// SIMD-ONLY0-NEXT: [[TMP367:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV884:%.*]] = sext i8 [[TMP367]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END885]] +// SIMD-ONLY0: cond.end885: +// SIMD-ONLY0-NEXT: [[COND886:%.*]] = phi i32 [ [[CONV882]], [[COND_TRUE881]] ], [ [[CONV884]], [[COND_FALSE883]] ] +// SIMD-ONLY0-NEXT: [[CONV887:%.*]] = trunc i32 [[COND886]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV887]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP368:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV888:%.*]] = sext i8 [[TMP368]] to i32 +// SIMD-ONLY0-NEXT: [[TMP369:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV889:%.*]] = sext i8 [[TMP369]] to i32 +// SIMD-ONLY0-NEXT: [[CMP890:%.*]] = icmp eq i32 [[CONV888]], [[CONV889]] +// SIMD-ONLY0-NEXT: br i1 [[CMP890]], label [[COND_TRUE892:%.*]], label [[COND_FALSE894:%.*]] +// SIMD-ONLY0: cond.true892: +// SIMD-ONLY0-NEXT: [[TMP370:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV893:%.*]] = sext i8 [[TMP370]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END896:%.*]] +// SIMD-ONLY0: cond.false894: +// SIMD-ONLY0-NEXT: [[TMP371:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV895:%.*]] = sext i8 [[TMP371]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END896]] +// SIMD-ONLY0: cond.end896: +// SIMD-ONLY0-NEXT: [[COND897:%.*]] = phi i32 [ [[CONV893]], [[COND_TRUE892]] ], [ [[CONV895]], [[COND_FALSE894]] ] +// SIMD-ONLY0-NEXT: [[CONV898:%.*]] = trunc i32 [[COND897]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV898]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP372:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV899:%.*]] = sext i8 [[TMP372]] to i32 +// SIMD-ONLY0-NEXT: [[TMP373:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV900:%.*]] = sext i8 [[TMP373]] to i32 +// SIMD-ONLY0-NEXT: [[CMP901:%.*]] = icmp eq i32 [[CONV899]], [[CONV900]] +// SIMD-ONLY0-NEXT: br i1 [[CMP901]], label [[IF_THEN903:%.*]], label [[IF_END904:%.*]] +// SIMD-ONLY0: if.then903: +// SIMD-ONLY0-NEXT: [[TMP374:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP374]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END904]] +// SIMD-ONLY0: if.end904: +// SIMD-ONLY0-NEXT: [[TMP375:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV905:%.*]] = sext i8 [[TMP375]] to i32 +// SIMD-ONLY0-NEXT: [[TMP376:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV906:%.*]] = sext i8 [[TMP376]] to i32 +// SIMD-ONLY0-NEXT: [[CMP907:%.*]] = icmp eq i32 [[CONV905]], [[CONV906]] +// SIMD-ONLY0-NEXT: br i1 [[CMP907]], label [[IF_THEN909:%.*]], label [[IF_END910:%.*]] +// SIMD-ONLY0: if.then909: +// SIMD-ONLY0-NEXT: [[TMP377:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP377]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END910]] +// SIMD-ONLY0: if.end910: +// SIMD-ONLY0-NEXT: [[TMP378:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV911:%.*]] = zext i8 [[TMP378]] to i32 +// SIMD-ONLY0-NEXT: [[TMP379:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV912:%.*]] = zext i8 [[TMP379]] to i32 +// SIMD-ONLY0-NEXT: [[CMP913:%.*]] = icmp sgt i32 [[CONV911]], [[CONV912]] +// SIMD-ONLY0-NEXT: br i1 [[CMP913]], label [[COND_TRUE915:%.*]], label [[COND_FALSE917:%.*]] +// SIMD-ONLY0: cond.true915: +// SIMD-ONLY0-NEXT: [[TMP380:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV916:%.*]] = zext i8 [[TMP380]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END919:%.*]] +// SIMD-ONLY0: cond.false917: +// SIMD-ONLY0-NEXT: [[TMP381:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV918:%.*]] = zext i8 [[TMP381]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END919]] +// SIMD-ONLY0: cond.end919: +// SIMD-ONLY0-NEXT: [[COND920:%.*]] = phi i32 [ [[CONV916]], [[COND_TRUE915]] ], [ [[CONV918]], [[COND_FALSE917]] ] +// SIMD-ONLY0-NEXT: [[CONV921:%.*]] = trunc i32 [[COND920]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV921]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP382:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV922:%.*]] = zext i8 [[TMP382]] to i32 +// SIMD-ONLY0-NEXT: [[TMP383:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV923:%.*]] = zext i8 [[TMP383]] to i32 +// SIMD-ONLY0-NEXT: [[CMP924:%.*]] = icmp slt i32 [[CONV922]], [[CONV923]] +// SIMD-ONLY0-NEXT: br i1 [[CMP924]], label [[COND_TRUE926:%.*]], label [[COND_FALSE928:%.*]] +// SIMD-ONLY0: cond.true926: +// SIMD-ONLY0-NEXT: [[TMP384:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV927:%.*]] = zext i8 [[TMP384]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END930:%.*]] +// SIMD-ONLY0: cond.false928: +// SIMD-ONLY0-NEXT: [[TMP385:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV929:%.*]] = zext i8 [[TMP385]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END930]] +// SIMD-ONLY0: cond.end930: +// SIMD-ONLY0-NEXT: [[COND931:%.*]] = phi i32 [ [[CONV927]], [[COND_TRUE926]] ], [ [[CONV929]], [[COND_FALSE928]] ] +// SIMD-ONLY0-NEXT: [[CONV932:%.*]] = trunc i32 [[COND931]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV932]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP386:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV933:%.*]] = zext i8 [[TMP386]] to i32 +// SIMD-ONLY0-NEXT: [[TMP387:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV934:%.*]] = zext i8 [[TMP387]] to i32 +// SIMD-ONLY0-NEXT: [[CMP935:%.*]] = icmp sgt i32 [[CONV933]], [[CONV934]] +// SIMD-ONLY0-NEXT: br i1 [[CMP935]], label [[COND_TRUE937:%.*]], label [[COND_FALSE939:%.*]] +// SIMD-ONLY0: cond.true937: +// SIMD-ONLY0-NEXT: [[TMP388:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV938:%.*]] = zext i8 [[TMP388]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END941:%.*]] +// SIMD-ONLY0: cond.false939: +// SIMD-ONLY0-NEXT: [[TMP389:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV940:%.*]] = zext i8 [[TMP389]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END941]] +// SIMD-ONLY0: cond.end941: +// SIMD-ONLY0-NEXT: [[COND942:%.*]] = phi i32 [ [[CONV938]], [[COND_TRUE937]] ], [ [[CONV940]], [[COND_FALSE939]] ] +// SIMD-ONLY0-NEXT: [[CONV943:%.*]] = trunc i32 [[COND942]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV943]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP390:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV944:%.*]] = zext i8 [[TMP390]] to i32 +// SIMD-ONLY0-NEXT: [[TMP391:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV945:%.*]] = zext i8 [[TMP391]] to i32 +// SIMD-ONLY0-NEXT: [[CMP946:%.*]] = icmp slt i32 [[CONV944]], [[CONV945]] +// SIMD-ONLY0-NEXT: br i1 [[CMP946]], label [[COND_TRUE948:%.*]], label [[COND_FALSE950:%.*]] +// SIMD-ONLY0: cond.true948: +// SIMD-ONLY0-NEXT: [[TMP392:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV949:%.*]] = zext i8 [[TMP392]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END952:%.*]] +// SIMD-ONLY0: cond.false950: +// SIMD-ONLY0-NEXT: [[TMP393:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV951:%.*]] = zext i8 [[TMP393]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END952]] +// SIMD-ONLY0: cond.end952: +// SIMD-ONLY0-NEXT: [[COND953:%.*]] = phi i32 [ [[CONV949]], [[COND_TRUE948]] ], [ [[CONV951]], [[COND_FALSE950]] ] +// SIMD-ONLY0-NEXT: [[CONV954:%.*]] = trunc i32 [[COND953]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV954]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP394:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV955:%.*]] = zext i8 [[TMP394]] to i32 +// SIMD-ONLY0-NEXT: [[TMP395:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV956:%.*]] = zext i8 [[TMP395]] to i32 +// SIMD-ONLY0-NEXT: [[CMP957:%.*]] = icmp sgt i32 [[CONV955]], [[CONV956]] +// SIMD-ONLY0-NEXT: br i1 [[CMP957]], label [[IF_THEN959:%.*]], label [[IF_END960:%.*]] +// SIMD-ONLY0: if.then959: +// SIMD-ONLY0-NEXT: [[TMP396:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP396]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END960]] +// SIMD-ONLY0: if.end960: +// SIMD-ONLY0-NEXT: [[TMP397:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV961:%.*]] = zext i8 [[TMP397]] to i32 +// SIMD-ONLY0-NEXT: [[TMP398:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV962:%.*]] = zext i8 [[TMP398]] to i32 +// SIMD-ONLY0-NEXT: [[CMP963:%.*]] = icmp slt i32 [[CONV961]], [[CONV962]] +// SIMD-ONLY0-NEXT: br i1 [[CMP963]], label [[IF_THEN965:%.*]], label [[IF_END966:%.*]] +// SIMD-ONLY0: if.then965: +// SIMD-ONLY0-NEXT: [[TMP399:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP399]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END966]] +// SIMD-ONLY0: if.end966: +// SIMD-ONLY0-NEXT: [[TMP400:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV967:%.*]] = zext i8 [[TMP400]] to i32 +// SIMD-ONLY0-NEXT: [[TMP401:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV968:%.*]] = zext i8 [[TMP401]] to i32 +// SIMD-ONLY0-NEXT: [[CMP969:%.*]] = icmp sgt i32 [[CONV967]], [[CONV968]] +// SIMD-ONLY0-NEXT: br i1 [[CMP969]], label [[IF_THEN971:%.*]], label [[IF_END972:%.*]] +// SIMD-ONLY0: if.then971: +// SIMD-ONLY0-NEXT: [[TMP402:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP402]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END972]] +// SIMD-ONLY0: if.end972: +// SIMD-ONLY0-NEXT: [[TMP403:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV973:%.*]] = zext i8 [[TMP403]] to i32 +// SIMD-ONLY0-NEXT: [[TMP404:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV974:%.*]] = zext i8 [[TMP404]] to i32 +// SIMD-ONLY0-NEXT: [[CMP975:%.*]] = icmp slt i32 [[CONV973]], [[CONV974]] +// SIMD-ONLY0-NEXT: br i1 [[CMP975]], label [[IF_THEN977:%.*]], label [[IF_END978:%.*]] +// SIMD-ONLY0: if.then977: +// SIMD-ONLY0-NEXT: [[TMP405:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP405]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END978]] +// SIMD-ONLY0: if.end978: +// SIMD-ONLY0-NEXT: [[TMP406:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV979:%.*]] = zext i8 [[TMP406]] to i32 +// SIMD-ONLY0-NEXT: [[TMP407:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV980:%.*]] = zext i8 [[TMP407]] to i32 +// SIMD-ONLY0-NEXT: [[CMP981:%.*]] = icmp eq i32 [[CONV979]], [[CONV980]] +// SIMD-ONLY0-NEXT: br i1 [[CMP981]], label [[COND_TRUE983:%.*]], label [[COND_FALSE985:%.*]] +// SIMD-ONLY0: cond.true983: +// SIMD-ONLY0-NEXT: [[TMP408:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV984:%.*]] = zext i8 [[TMP408]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END987:%.*]] +// SIMD-ONLY0: cond.false985: +// SIMD-ONLY0-NEXT: [[TMP409:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV986:%.*]] = zext i8 [[TMP409]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END987]] +// SIMD-ONLY0: cond.end987: +// SIMD-ONLY0-NEXT: [[COND988:%.*]] = phi i32 [ [[CONV984]], [[COND_TRUE983]] ], [ [[CONV986]], [[COND_FALSE985]] ] +// SIMD-ONLY0-NEXT: [[CONV989:%.*]] = trunc i32 [[COND988]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV989]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP410:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV990:%.*]] = zext i8 [[TMP410]] to i32 +// SIMD-ONLY0-NEXT: [[TMP411:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV991:%.*]] = zext i8 [[TMP411]] to i32 +// SIMD-ONLY0-NEXT: [[CMP992:%.*]] = icmp eq i32 [[CONV990]], [[CONV991]] +// SIMD-ONLY0-NEXT: br i1 [[CMP992]], label [[COND_TRUE994:%.*]], label [[COND_FALSE996:%.*]] +// SIMD-ONLY0: cond.true994: +// SIMD-ONLY0-NEXT: [[TMP412:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV995:%.*]] = zext i8 [[TMP412]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END998:%.*]] +// SIMD-ONLY0: cond.false996: +// SIMD-ONLY0-NEXT: [[TMP413:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV997:%.*]] = zext i8 [[TMP413]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END998]] +// SIMD-ONLY0: cond.end998: +// SIMD-ONLY0-NEXT: [[COND999:%.*]] = phi i32 [ [[CONV995]], [[COND_TRUE994]] ], [ [[CONV997]], [[COND_FALSE996]] ] +// SIMD-ONLY0-NEXT: [[CONV1000:%.*]] = trunc i32 [[COND999]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1000]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP414:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1001:%.*]] = zext i8 [[TMP414]] to i32 +// SIMD-ONLY0-NEXT: [[TMP415:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1002:%.*]] = zext i8 [[TMP415]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1003:%.*]] = icmp eq i32 [[CONV1001]], [[CONV1002]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1003]], label [[IF_THEN1005:%.*]], label [[IF_END1006:%.*]] +// SIMD-ONLY0: if.then1005: +// SIMD-ONLY0-NEXT: [[TMP416:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP416]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1006]] +// SIMD-ONLY0: if.end1006: +// SIMD-ONLY0-NEXT: [[TMP417:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1007:%.*]] = zext i8 [[TMP417]] to i32 +// SIMD-ONLY0-NEXT: [[TMP418:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1008:%.*]] = zext i8 [[TMP418]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1009:%.*]] = icmp eq i32 [[CONV1007]], [[CONV1008]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1009]], label [[IF_THEN1011:%.*]], label [[IF_END1012:%.*]] +// SIMD-ONLY0: if.then1011: +// SIMD-ONLY0-NEXT: [[TMP419:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP419]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1012]] +// SIMD-ONLY0: if.end1012: +// SIMD-ONLY0-NEXT: [[TMP420:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1013:%.*]] = sext i8 [[TMP420]] to i32 +// SIMD-ONLY0-NEXT: [[TMP421:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1014:%.*]] = sext i8 [[TMP421]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1015:%.*]] = icmp sgt i32 [[CONV1013]], [[CONV1014]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1015]], label [[COND_TRUE1017:%.*]], label [[COND_FALSE1019:%.*]] +// SIMD-ONLY0: cond.true1017: +// SIMD-ONLY0-NEXT: [[TMP422:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1018:%.*]] = sext i8 [[TMP422]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1021:%.*]] +// SIMD-ONLY0: cond.false1019: +// SIMD-ONLY0-NEXT: [[TMP423:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1020:%.*]] = sext i8 [[TMP423]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1021]] +// SIMD-ONLY0: cond.end1021: +// SIMD-ONLY0-NEXT: [[COND1022:%.*]] = phi i32 [ [[CONV1018]], [[COND_TRUE1017]] ], [ [[CONV1020]], [[COND_FALSE1019]] ] +// SIMD-ONLY0-NEXT: [[CONV1023:%.*]] = trunc i32 [[COND1022]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1023]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP424:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1024:%.*]] = sext i8 [[TMP424]] to i32 +// SIMD-ONLY0-NEXT: [[TMP425:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1025:%.*]] = sext i8 [[TMP425]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1026:%.*]] = icmp slt i32 [[CONV1024]], [[CONV1025]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1026]], label [[COND_TRUE1028:%.*]], label [[COND_FALSE1030:%.*]] +// SIMD-ONLY0: cond.true1028: +// SIMD-ONLY0-NEXT: [[TMP426:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1029:%.*]] = sext i8 [[TMP426]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1032:%.*]] +// SIMD-ONLY0: cond.false1030: +// SIMD-ONLY0-NEXT: [[TMP427:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1031:%.*]] = sext i8 [[TMP427]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1032]] +// SIMD-ONLY0: cond.end1032: +// SIMD-ONLY0-NEXT: [[COND1033:%.*]] = phi i32 [ [[CONV1029]], [[COND_TRUE1028]] ], [ [[CONV1031]], [[COND_FALSE1030]] ] +// SIMD-ONLY0-NEXT: [[CONV1034:%.*]] = trunc i32 [[COND1033]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1034]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP428:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1035:%.*]] = sext i8 [[TMP428]] to i32 +// SIMD-ONLY0-NEXT: [[TMP429:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1036:%.*]] = sext i8 [[TMP429]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1037:%.*]] = icmp sgt i32 [[CONV1035]], [[CONV1036]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1037]], label [[COND_TRUE1039:%.*]], label [[COND_FALSE1041:%.*]] +// SIMD-ONLY0: cond.true1039: +// SIMD-ONLY0-NEXT: [[TMP430:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1040:%.*]] = sext i8 [[TMP430]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1043:%.*]] +// SIMD-ONLY0: cond.false1041: +// SIMD-ONLY0-NEXT: [[TMP431:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1042:%.*]] = sext i8 [[TMP431]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1043]] +// SIMD-ONLY0: cond.end1043: +// SIMD-ONLY0-NEXT: [[COND1044:%.*]] = phi i32 [ [[CONV1040]], [[COND_TRUE1039]] ], [ [[CONV1042]], [[COND_FALSE1041]] ] +// SIMD-ONLY0-NEXT: [[CONV1045:%.*]] = trunc i32 [[COND1044]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1045]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP432:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1046:%.*]] = sext i8 [[TMP432]] to i32 +// SIMD-ONLY0-NEXT: [[TMP433:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1047:%.*]] = sext i8 [[TMP433]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1048:%.*]] = icmp slt i32 [[CONV1046]], [[CONV1047]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1048]], label [[COND_TRUE1050:%.*]], label [[COND_FALSE1052:%.*]] +// SIMD-ONLY0: cond.true1050: +// SIMD-ONLY0-NEXT: [[TMP434:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1051:%.*]] = sext i8 [[TMP434]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1054:%.*]] +// SIMD-ONLY0: cond.false1052: +// SIMD-ONLY0-NEXT: [[TMP435:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1053:%.*]] = sext i8 [[TMP435]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1054]] +// SIMD-ONLY0: cond.end1054: +// SIMD-ONLY0-NEXT: [[COND1055:%.*]] = phi i32 [ [[CONV1051]], [[COND_TRUE1050]] ], [ [[CONV1053]], [[COND_FALSE1052]] ] +// SIMD-ONLY0-NEXT: [[CONV1056:%.*]] = trunc i32 [[COND1055]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1056]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP436:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1057:%.*]] = sext i8 [[TMP436]] to i32 +// SIMD-ONLY0-NEXT: [[TMP437:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1058:%.*]] = sext i8 [[TMP437]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1059:%.*]] = icmp sgt i32 [[CONV1057]], [[CONV1058]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1059]], label [[IF_THEN1061:%.*]], label [[IF_END1062:%.*]] +// SIMD-ONLY0: if.then1061: +// SIMD-ONLY0-NEXT: [[TMP438:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP438]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1062]] +// SIMD-ONLY0: if.end1062: +// SIMD-ONLY0-NEXT: [[TMP439:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1063:%.*]] = sext i8 [[TMP439]] to i32 +// SIMD-ONLY0-NEXT: [[TMP440:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1064:%.*]] = sext i8 [[TMP440]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1065:%.*]] = icmp slt i32 [[CONV1063]], [[CONV1064]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1065]], label [[IF_THEN1067:%.*]], label [[IF_END1068:%.*]] +// SIMD-ONLY0: if.then1067: +// SIMD-ONLY0-NEXT: [[TMP441:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP441]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1068]] +// SIMD-ONLY0: if.end1068: +// SIMD-ONLY0-NEXT: [[TMP442:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1069:%.*]] = sext i8 [[TMP442]] to i32 +// SIMD-ONLY0-NEXT: [[TMP443:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1070:%.*]] = sext i8 [[TMP443]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1071:%.*]] = icmp sgt i32 [[CONV1069]], [[CONV1070]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1071]], label [[IF_THEN1073:%.*]], label [[IF_END1074:%.*]] +// SIMD-ONLY0: if.then1073: +// SIMD-ONLY0-NEXT: [[TMP444:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP444]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1074]] +// SIMD-ONLY0: if.end1074: +// SIMD-ONLY0-NEXT: [[TMP445:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1075:%.*]] = sext i8 [[TMP445]] to i32 +// SIMD-ONLY0-NEXT: [[TMP446:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1076:%.*]] = sext i8 [[TMP446]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1077:%.*]] = icmp slt i32 [[CONV1075]], [[CONV1076]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1077]], label [[IF_THEN1079:%.*]], label [[IF_END1080:%.*]] +// SIMD-ONLY0: if.then1079: +// SIMD-ONLY0-NEXT: [[TMP447:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP447]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1080]] +// SIMD-ONLY0: if.end1080: +// SIMD-ONLY0-NEXT: [[TMP448:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1081:%.*]] = sext i8 [[TMP448]] to i32 +// SIMD-ONLY0-NEXT: [[TMP449:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1082:%.*]] = sext i8 [[TMP449]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1083:%.*]] = icmp eq i32 [[CONV1081]], [[CONV1082]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1083]], label [[COND_TRUE1085:%.*]], label [[COND_FALSE1087:%.*]] +// SIMD-ONLY0: cond.true1085: +// SIMD-ONLY0-NEXT: [[TMP450:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1086:%.*]] = sext i8 [[TMP450]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1089:%.*]] +// SIMD-ONLY0: cond.false1087: +// SIMD-ONLY0-NEXT: [[TMP451:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1088:%.*]] = sext i8 [[TMP451]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1089]] +// SIMD-ONLY0: cond.end1089: +// SIMD-ONLY0-NEXT: [[COND1090:%.*]] = phi i32 [ [[CONV1086]], [[COND_TRUE1085]] ], [ [[CONV1088]], [[COND_FALSE1087]] ] +// SIMD-ONLY0-NEXT: [[CONV1091:%.*]] = trunc i32 [[COND1090]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1091]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP452:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1092:%.*]] = sext i8 [[TMP452]] to i32 +// SIMD-ONLY0-NEXT: [[TMP453:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1093:%.*]] = sext i8 [[TMP453]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1094:%.*]] = icmp eq i32 [[CONV1092]], [[CONV1093]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1094]], label [[COND_TRUE1096:%.*]], label [[COND_FALSE1098:%.*]] +// SIMD-ONLY0: cond.true1096: +// SIMD-ONLY0-NEXT: [[TMP454:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1097:%.*]] = sext i8 [[TMP454]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1100:%.*]] +// SIMD-ONLY0: cond.false1098: +// SIMD-ONLY0-NEXT: [[TMP455:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1099:%.*]] = sext i8 [[TMP455]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1100]] +// SIMD-ONLY0: cond.end1100: +// SIMD-ONLY0-NEXT: [[COND1101:%.*]] = phi i32 [ [[CONV1097]], [[COND_TRUE1096]] ], [ [[CONV1099]], [[COND_FALSE1098]] ] +// SIMD-ONLY0-NEXT: [[CONV1102:%.*]] = trunc i32 [[COND1101]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1102]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP456:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1103:%.*]] = sext i8 [[TMP456]] to i32 +// SIMD-ONLY0-NEXT: [[TMP457:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1104:%.*]] = sext i8 [[TMP457]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1105:%.*]] = icmp eq i32 [[CONV1103]], [[CONV1104]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1105]], label [[IF_THEN1107:%.*]], label [[IF_END1108:%.*]] +// SIMD-ONLY0: if.then1107: +// SIMD-ONLY0-NEXT: [[TMP458:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP458]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1108]] +// SIMD-ONLY0: if.end1108: +// SIMD-ONLY0-NEXT: [[TMP459:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1109:%.*]] = sext i8 [[TMP459]] to i32 +// SIMD-ONLY0-NEXT: [[TMP460:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1110:%.*]] = sext i8 [[TMP460]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1111:%.*]] = icmp eq i32 [[CONV1109]], [[CONV1110]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1111]], label [[IF_THEN1113:%.*]], label [[IF_END1114:%.*]] +// SIMD-ONLY0: if.then1113: +// SIMD-ONLY0-NEXT: [[TMP461:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP461]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1114]] +// SIMD-ONLY0: if.end1114: +// SIMD-ONLY0-NEXT: [[TMP462:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1115:%.*]] = zext i8 [[TMP462]] to i32 +// SIMD-ONLY0-NEXT: [[TMP463:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1116:%.*]] = zext i8 [[TMP463]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1117:%.*]] = icmp sgt i32 [[CONV1115]], [[CONV1116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1117]], label [[COND_TRUE1119:%.*]], label [[COND_FALSE1121:%.*]] +// SIMD-ONLY0: cond.true1119: +// SIMD-ONLY0-NEXT: [[TMP464:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1120:%.*]] = zext i8 [[TMP464]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1123:%.*]] +// SIMD-ONLY0: cond.false1121: +// SIMD-ONLY0-NEXT: [[TMP465:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1122:%.*]] = zext i8 [[TMP465]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1123]] +// SIMD-ONLY0: cond.end1123: +// SIMD-ONLY0-NEXT: [[COND1124:%.*]] = phi i32 [ [[CONV1120]], [[COND_TRUE1119]] ], [ [[CONV1122]], [[COND_FALSE1121]] ] +// SIMD-ONLY0-NEXT: [[CONV1125:%.*]] = trunc i32 [[COND1124]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1125]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP466:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1126:%.*]] = zext i8 [[TMP466]] to i32 +// SIMD-ONLY0-NEXT: [[TMP467:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1127:%.*]] = zext i8 [[TMP467]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1128:%.*]] = icmp slt i32 [[CONV1126]], [[CONV1127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1128]], label [[COND_TRUE1130:%.*]], label [[COND_FALSE1132:%.*]] +// SIMD-ONLY0: cond.true1130: +// SIMD-ONLY0-NEXT: [[TMP468:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1131:%.*]] = zext i8 [[TMP468]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1134:%.*]] +// SIMD-ONLY0: cond.false1132: +// SIMD-ONLY0-NEXT: [[TMP469:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1133:%.*]] = zext i8 [[TMP469]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1134]] +// SIMD-ONLY0: cond.end1134: +// SIMD-ONLY0-NEXT: [[COND1135:%.*]] = phi i32 [ [[CONV1131]], [[COND_TRUE1130]] ], [ [[CONV1133]], [[COND_FALSE1132]] ] +// SIMD-ONLY0-NEXT: [[CONV1136:%.*]] = trunc i32 [[COND1135]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1136]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP470:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1137:%.*]] = zext i8 [[TMP470]] to i32 +// SIMD-ONLY0-NEXT: [[TMP471:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1138:%.*]] = zext i8 [[TMP471]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1139:%.*]] = icmp sgt i32 [[CONV1137]], [[CONV1138]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1139]], label [[COND_TRUE1141:%.*]], label [[COND_FALSE1143:%.*]] +// SIMD-ONLY0: cond.true1141: +// SIMD-ONLY0-NEXT: [[TMP472:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1142:%.*]] = zext i8 [[TMP472]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1145:%.*]] +// SIMD-ONLY0: cond.false1143: +// SIMD-ONLY0-NEXT: [[TMP473:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1144:%.*]] = zext i8 [[TMP473]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1145]] +// SIMD-ONLY0: cond.end1145: +// SIMD-ONLY0-NEXT: [[COND1146:%.*]] = phi i32 [ [[CONV1142]], [[COND_TRUE1141]] ], [ [[CONV1144]], [[COND_FALSE1143]] ] +// SIMD-ONLY0-NEXT: [[CONV1147:%.*]] = trunc i32 [[COND1146]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1147]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP474:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1148:%.*]] = zext i8 [[TMP474]] to i32 +// SIMD-ONLY0-NEXT: [[TMP475:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1149:%.*]] = zext i8 [[TMP475]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1150:%.*]] = icmp slt i32 [[CONV1148]], [[CONV1149]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1150]], label [[COND_TRUE1152:%.*]], label [[COND_FALSE1154:%.*]] +// SIMD-ONLY0: cond.true1152: +// SIMD-ONLY0-NEXT: [[TMP476:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1153:%.*]] = zext i8 [[TMP476]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1156:%.*]] +// SIMD-ONLY0: cond.false1154: +// SIMD-ONLY0-NEXT: [[TMP477:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1155:%.*]] = zext i8 [[TMP477]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1156]] +// SIMD-ONLY0: cond.end1156: +// SIMD-ONLY0-NEXT: [[COND1157:%.*]] = phi i32 [ [[CONV1153]], [[COND_TRUE1152]] ], [ [[CONV1155]], [[COND_FALSE1154]] ] +// SIMD-ONLY0-NEXT: [[CONV1158:%.*]] = trunc i32 [[COND1157]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1158]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP478:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1159:%.*]] = zext i8 [[TMP478]] to i32 +// SIMD-ONLY0-NEXT: [[TMP479:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1160:%.*]] = zext i8 [[TMP479]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1161:%.*]] = icmp sgt i32 [[CONV1159]], [[CONV1160]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1161]], label [[IF_THEN1163:%.*]], label [[IF_END1164:%.*]] +// SIMD-ONLY0: if.then1163: +// SIMD-ONLY0-NEXT: [[TMP480:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP480]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1164]] +// SIMD-ONLY0: if.end1164: +// SIMD-ONLY0-NEXT: [[TMP481:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1165:%.*]] = zext i8 [[TMP481]] to i32 +// SIMD-ONLY0-NEXT: [[TMP482:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1166:%.*]] = zext i8 [[TMP482]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1167:%.*]] = icmp slt i32 [[CONV1165]], [[CONV1166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1167]], label [[IF_THEN1169:%.*]], label [[IF_END1170:%.*]] +// SIMD-ONLY0: if.then1169: +// SIMD-ONLY0-NEXT: [[TMP483:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP483]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1170]] +// SIMD-ONLY0: if.end1170: +// SIMD-ONLY0-NEXT: [[TMP484:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1171:%.*]] = zext i8 [[TMP484]] to i32 +// SIMD-ONLY0-NEXT: [[TMP485:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1172:%.*]] = zext i8 [[TMP485]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1173:%.*]] = icmp sgt i32 [[CONV1171]], [[CONV1172]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1173]], label [[IF_THEN1175:%.*]], label [[IF_END1176:%.*]] +// SIMD-ONLY0: if.then1175: +// SIMD-ONLY0-NEXT: [[TMP486:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP486]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1176]] +// SIMD-ONLY0: if.end1176: +// SIMD-ONLY0-NEXT: [[TMP487:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1177:%.*]] = zext i8 [[TMP487]] to i32 +// SIMD-ONLY0-NEXT: [[TMP488:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1178:%.*]] = zext i8 [[TMP488]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1179:%.*]] = icmp slt i32 [[CONV1177]], [[CONV1178]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1179]], label [[IF_THEN1181:%.*]], label [[IF_END1182:%.*]] +// SIMD-ONLY0: if.then1181: +// SIMD-ONLY0-NEXT: [[TMP489:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP489]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1182]] +// SIMD-ONLY0: if.end1182: +// SIMD-ONLY0-NEXT: [[TMP490:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1183:%.*]] = zext i8 [[TMP490]] to i32 +// SIMD-ONLY0-NEXT: [[TMP491:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1184:%.*]] = zext i8 [[TMP491]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1185:%.*]] = icmp eq i32 [[CONV1183]], [[CONV1184]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1185]], label [[COND_TRUE1187:%.*]], label [[COND_FALSE1189:%.*]] +// SIMD-ONLY0: cond.true1187: +// SIMD-ONLY0-NEXT: [[TMP492:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1188:%.*]] = zext i8 [[TMP492]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1191:%.*]] +// SIMD-ONLY0: cond.false1189: +// SIMD-ONLY0-NEXT: [[TMP493:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1190:%.*]] = zext i8 [[TMP493]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1191]] +// SIMD-ONLY0: cond.end1191: +// SIMD-ONLY0-NEXT: [[COND1192:%.*]] = phi i32 [ [[CONV1188]], [[COND_TRUE1187]] ], [ [[CONV1190]], [[COND_FALSE1189]] ] +// SIMD-ONLY0-NEXT: [[CONV1193:%.*]] = trunc i32 [[COND1192]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1193]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP494:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1194:%.*]] = zext i8 [[TMP494]] to i32 +// SIMD-ONLY0-NEXT: [[TMP495:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1195:%.*]] = zext i8 [[TMP495]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1196:%.*]] = icmp eq i32 [[CONV1194]], [[CONV1195]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1196]], label [[COND_TRUE1198:%.*]], label [[COND_FALSE1200:%.*]] +// SIMD-ONLY0: cond.true1198: +// SIMD-ONLY0-NEXT: [[TMP496:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1199:%.*]] = zext i8 [[TMP496]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1202:%.*]] +// SIMD-ONLY0: cond.false1200: +// SIMD-ONLY0-NEXT: [[TMP497:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1201:%.*]] = zext i8 [[TMP497]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1202]] +// SIMD-ONLY0: cond.end1202: +// SIMD-ONLY0-NEXT: [[COND1203:%.*]] = phi i32 [ [[CONV1199]], [[COND_TRUE1198]] ], [ [[CONV1201]], [[COND_FALSE1200]] ] +// SIMD-ONLY0-NEXT: [[CONV1204:%.*]] = trunc i32 [[COND1203]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1204]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP498:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1205:%.*]] = zext i8 [[TMP498]] to i32 +// SIMD-ONLY0-NEXT: [[TMP499:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1206:%.*]] = zext i8 [[TMP499]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1207:%.*]] = icmp eq i32 [[CONV1205]], [[CONV1206]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1207]], label [[IF_THEN1209:%.*]], label [[IF_END1210:%.*]] +// SIMD-ONLY0: if.then1209: +// SIMD-ONLY0-NEXT: [[TMP500:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP500]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1210]] +// SIMD-ONLY0: if.end1210: +// SIMD-ONLY0-NEXT: [[TMP501:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1211:%.*]] = zext i8 [[TMP501]] to i32 +// SIMD-ONLY0-NEXT: [[TMP502:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1212:%.*]] = zext i8 [[TMP502]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1213:%.*]] = icmp eq i32 [[CONV1211]], [[CONV1212]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1213]], label [[IF_THEN1215:%.*]], label [[IF_END1216:%.*]] +// SIMD-ONLY0: if.then1215: +// SIMD-ONLY0-NEXT: [[TMP503:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP503]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1216]] +// SIMD-ONLY0: if.end1216: +// SIMD-ONLY0-NEXT: [[TMP504:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1217:%.*]] = sext i16 [[TMP504]] to i32 +// SIMD-ONLY0-NEXT: [[TMP505:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1218:%.*]] = sext i16 [[TMP505]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1219:%.*]] = icmp sgt i32 [[CONV1217]], [[CONV1218]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1219]], label [[COND_TRUE1221:%.*]], label [[COND_FALSE1223:%.*]] +// SIMD-ONLY0: cond.true1221: +// SIMD-ONLY0-NEXT: [[TMP506:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1222:%.*]] = sext i16 [[TMP506]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1225:%.*]] +// SIMD-ONLY0: cond.false1223: +// SIMD-ONLY0-NEXT: [[TMP507:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1224:%.*]] = sext i16 [[TMP507]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1225]] +// SIMD-ONLY0: cond.end1225: +// SIMD-ONLY0-NEXT: [[COND1226:%.*]] = phi i32 [ [[CONV1222]], [[COND_TRUE1221]] ], [ [[CONV1224]], [[COND_FALSE1223]] ] +// SIMD-ONLY0-NEXT: [[CONV1227:%.*]] = trunc i32 [[COND1226]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1227]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP508:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1228:%.*]] = sext i16 [[TMP508]] to i32 +// SIMD-ONLY0-NEXT: [[TMP509:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1229:%.*]] = sext i16 [[TMP509]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1230:%.*]] = icmp slt i32 [[CONV1228]], [[CONV1229]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1230]], label [[COND_TRUE1232:%.*]], label [[COND_FALSE1234:%.*]] +// SIMD-ONLY0: cond.true1232: +// SIMD-ONLY0-NEXT: [[TMP510:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1233:%.*]] = sext i16 [[TMP510]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1236:%.*]] +// SIMD-ONLY0: cond.false1234: +// SIMD-ONLY0-NEXT: [[TMP511:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1235:%.*]] = sext i16 [[TMP511]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1236]] +// SIMD-ONLY0: cond.end1236: +// SIMD-ONLY0-NEXT: [[COND1237:%.*]] = phi i32 [ [[CONV1233]], [[COND_TRUE1232]] ], [ [[CONV1235]], [[COND_FALSE1234]] ] +// SIMD-ONLY0-NEXT: [[CONV1238:%.*]] = trunc i32 [[COND1237]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1238]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP512:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1239:%.*]] = sext i16 [[TMP512]] to i32 +// SIMD-ONLY0-NEXT: [[TMP513:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1240:%.*]] = sext i16 [[TMP513]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1241:%.*]] = icmp sgt i32 [[CONV1239]], [[CONV1240]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1241]], label [[COND_TRUE1243:%.*]], label [[COND_FALSE1245:%.*]] +// SIMD-ONLY0: cond.true1243: +// SIMD-ONLY0-NEXT: [[TMP514:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1244:%.*]] = sext i16 [[TMP514]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1247:%.*]] +// SIMD-ONLY0: cond.false1245: +// SIMD-ONLY0-NEXT: [[TMP515:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1246:%.*]] = sext i16 [[TMP515]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1247]] +// SIMD-ONLY0: cond.end1247: +// SIMD-ONLY0-NEXT: [[COND1248:%.*]] = phi i32 [ [[CONV1244]], [[COND_TRUE1243]] ], [ [[CONV1246]], [[COND_FALSE1245]] ] +// SIMD-ONLY0-NEXT: [[CONV1249:%.*]] = trunc i32 [[COND1248]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1249]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP516:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1250:%.*]] = sext i16 [[TMP516]] to i32 +// SIMD-ONLY0-NEXT: [[TMP517:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1251:%.*]] = sext i16 [[TMP517]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1252:%.*]] = icmp slt i32 [[CONV1250]], [[CONV1251]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1252]], label [[COND_TRUE1254:%.*]], label [[COND_FALSE1256:%.*]] +// SIMD-ONLY0: cond.true1254: +// SIMD-ONLY0-NEXT: [[TMP518:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1255:%.*]] = sext i16 [[TMP518]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1258:%.*]] +// SIMD-ONLY0: cond.false1256: +// SIMD-ONLY0-NEXT: [[TMP519:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1257:%.*]] = sext i16 [[TMP519]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1258]] +// SIMD-ONLY0: cond.end1258: +// SIMD-ONLY0-NEXT: [[COND1259:%.*]] = phi i32 [ [[CONV1255]], [[COND_TRUE1254]] ], [ [[CONV1257]], [[COND_FALSE1256]] ] +// SIMD-ONLY0-NEXT: [[CONV1260:%.*]] = trunc i32 [[COND1259]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1260]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP520:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1261:%.*]] = sext i16 [[TMP520]] to i32 +// SIMD-ONLY0-NEXT: [[TMP521:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1262:%.*]] = sext i16 [[TMP521]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1263:%.*]] = icmp sgt i32 [[CONV1261]], [[CONV1262]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1263]], label [[IF_THEN1265:%.*]], label [[IF_END1266:%.*]] +// SIMD-ONLY0: if.then1265: +// SIMD-ONLY0-NEXT: [[TMP522:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP522]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1266]] +// SIMD-ONLY0: if.end1266: +// SIMD-ONLY0-NEXT: [[TMP523:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1267:%.*]] = sext i16 [[TMP523]] to i32 +// SIMD-ONLY0-NEXT: [[TMP524:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1268:%.*]] = sext i16 [[TMP524]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1269:%.*]] = icmp slt i32 [[CONV1267]], [[CONV1268]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1269]], label [[IF_THEN1271:%.*]], label [[IF_END1272:%.*]] +// SIMD-ONLY0: if.then1271: +// SIMD-ONLY0-NEXT: [[TMP525:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP525]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1272]] +// SIMD-ONLY0: if.end1272: +// SIMD-ONLY0-NEXT: [[TMP526:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1273:%.*]] = sext i16 [[TMP526]] to i32 +// SIMD-ONLY0-NEXT: [[TMP527:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1274:%.*]] = sext i16 [[TMP527]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1275:%.*]] = icmp sgt i32 [[CONV1273]], [[CONV1274]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1275]], label [[IF_THEN1277:%.*]], label [[IF_END1278:%.*]] +// SIMD-ONLY0: if.then1277: +// SIMD-ONLY0-NEXT: [[TMP528:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP528]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1278]] +// SIMD-ONLY0: if.end1278: +// SIMD-ONLY0-NEXT: [[TMP529:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1279:%.*]] = sext i16 [[TMP529]] to i32 +// SIMD-ONLY0-NEXT: [[TMP530:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1280:%.*]] = sext i16 [[TMP530]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1281:%.*]] = icmp slt i32 [[CONV1279]], [[CONV1280]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1281]], label [[IF_THEN1283:%.*]], label [[IF_END1284:%.*]] +// SIMD-ONLY0: if.then1283: +// SIMD-ONLY0-NEXT: [[TMP531:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP531]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1284]] +// SIMD-ONLY0: if.end1284: +// SIMD-ONLY0-NEXT: [[TMP532:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1285:%.*]] = sext i16 [[TMP532]] to i32 +// SIMD-ONLY0-NEXT: [[TMP533:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1286:%.*]] = sext i16 [[TMP533]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1287:%.*]] = icmp eq i32 [[CONV1285]], [[CONV1286]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1287]], label [[COND_TRUE1289:%.*]], label [[COND_FALSE1291:%.*]] +// SIMD-ONLY0: cond.true1289: +// SIMD-ONLY0-NEXT: [[TMP534:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1290:%.*]] = sext i16 [[TMP534]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1293:%.*]] +// SIMD-ONLY0: cond.false1291: +// SIMD-ONLY0-NEXT: [[TMP535:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1292:%.*]] = sext i16 [[TMP535]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1293]] +// SIMD-ONLY0: cond.end1293: +// SIMD-ONLY0-NEXT: [[COND1294:%.*]] = phi i32 [ [[CONV1290]], [[COND_TRUE1289]] ], [ [[CONV1292]], [[COND_FALSE1291]] ] +// SIMD-ONLY0-NEXT: [[CONV1295:%.*]] = trunc i32 [[COND1294]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1295]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP536:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1296:%.*]] = sext i16 [[TMP536]] to i32 +// SIMD-ONLY0-NEXT: [[TMP537:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1297:%.*]] = sext i16 [[TMP537]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1298:%.*]] = icmp eq i32 [[CONV1296]], [[CONV1297]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1298]], label [[COND_TRUE1300:%.*]], label [[COND_FALSE1302:%.*]] +// SIMD-ONLY0: cond.true1300: +// SIMD-ONLY0-NEXT: [[TMP538:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1301:%.*]] = sext i16 [[TMP538]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1304:%.*]] +// SIMD-ONLY0: cond.false1302: +// SIMD-ONLY0-NEXT: [[TMP539:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1303:%.*]] = sext i16 [[TMP539]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1304]] +// SIMD-ONLY0: cond.end1304: +// SIMD-ONLY0-NEXT: [[COND1305:%.*]] = phi i32 [ [[CONV1301]], [[COND_TRUE1300]] ], [ [[CONV1303]], [[COND_FALSE1302]] ] +// SIMD-ONLY0-NEXT: [[CONV1306:%.*]] = trunc i32 [[COND1305]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1306]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP540:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1307:%.*]] = sext i16 [[TMP540]] to i32 +// SIMD-ONLY0-NEXT: [[TMP541:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1308:%.*]] = sext i16 [[TMP541]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1309:%.*]] = icmp eq i32 [[CONV1307]], [[CONV1308]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1309]], label [[IF_THEN1311:%.*]], label [[IF_END1312:%.*]] +// SIMD-ONLY0: if.then1311: +// SIMD-ONLY0-NEXT: [[TMP542:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP542]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1312]] +// SIMD-ONLY0: if.end1312: +// SIMD-ONLY0-NEXT: [[TMP543:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1313:%.*]] = sext i16 [[TMP543]] to i32 +// SIMD-ONLY0-NEXT: [[TMP544:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1314:%.*]] = sext i16 [[TMP544]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1315:%.*]] = icmp eq i32 [[CONV1313]], [[CONV1314]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1315]], label [[IF_THEN1317:%.*]], label [[IF_END1318:%.*]] +// SIMD-ONLY0: if.then1317: +// SIMD-ONLY0-NEXT: [[TMP545:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP545]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1318]] +// SIMD-ONLY0: if.end1318: +// SIMD-ONLY0-NEXT: [[TMP546:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1319:%.*]] = zext i16 [[TMP546]] to i32 +// SIMD-ONLY0-NEXT: [[TMP547:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1320:%.*]] = zext i16 [[TMP547]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1321:%.*]] = icmp sgt i32 [[CONV1319]], [[CONV1320]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1321]], label [[COND_TRUE1323:%.*]], label [[COND_FALSE1325:%.*]] +// SIMD-ONLY0: cond.true1323: +// SIMD-ONLY0-NEXT: [[TMP548:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1324:%.*]] = zext i16 [[TMP548]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1327:%.*]] +// SIMD-ONLY0: cond.false1325: +// SIMD-ONLY0-NEXT: [[TMP549:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1326:%.*]] = zext i16 [[TMP549]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1327]] +// SIMD-ONLY0: cond.end1327: +// SIMD-ONLY0-NEXT: [[COND1328:%.*]] = phi i32 [ [[CONV1324]], [[COND_TRUE1323]] ], [ [[CONV1326]], [[COND_FALSE1325]] ] +// SIMD-ONLY0-NEXT: [[CONV1329:%.*]] = trunc i32 [[COND1328]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1329]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP550:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1330:%.*]] = zext i16 [[TMP550]] to i32 +// SIMD-ONLY0-NEXT: [[TMP551:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1331:%.*]] = zext i16 [[TMP551]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1332:%.*]] = icmp slt i32 [[CONV1330]], [[CONV1331]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1332]], label [[COND_TRUE1334:%.*]], label [[COND_FALSE1336:%.*]] +// SIMD-ONLY0: cond.true1334: +// SIMD-ONLY0-NEXT: [[TMP552:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1335:%.*]] = zext i16 [[TMP552]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1338:%.*]] +// SIMD-ONLY0: cond.false1336: +// SIMD-ONLY0-NEXT: [[TMP553:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1337:%.*]] = zext i16 [[TMP553]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1338]] +// SIMD-ONLY0: cond.end1338: +// SIMD-ONLY0-NEXT: [[COND1339:%.*]] = phi i32 [ [[CONV1335]], [[COND_TRUE1334]] ], [ [[CONV1337]], [[COND_FALSE1336]] ] +// SIMD-ONLY0-NEXT: [[CONV1340:%.*]] = trunc i32 [[COND1339]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1340]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP554:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1341:%.*]] = zext i16 [[TMP554]] to i32 +// SIMD-ONLY0-NEXT: [[TMP555:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1342:%.*]] = zext i16 [[TMP555]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1343:%.*]] = icmp sgt i32 [[CONV1341]], [[CONV1342]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1343]], label [[COND_TRUE1345:%.*]], label [[COND_FALSE1347:%.*]] +// SIMD-ONLY0: cond.true1345: +// SIMD-ONLY0-NEXT: [[TMP556:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1346:%.*]] = zext i16 [[TMP556]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1349:%.*]] +// SIMD-ONLY0: cond.false1347: +// SIMD-ONLY0-NEXT: [[TMP557:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1348:%.*]] = zext i16 [[TMP557]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1349]] +// SIMD-ONLY0: cond.end1349: +// SIMD-ONLY0-NEXT: [[COND1350:%.*]] = phi i32 [ [[CONV1346]], [[COND_TRUE1345]] ], [ [[CONV1348]], [[COND_FALSE1347]] ] +// SIMD-ONLY0-NEXT: [[CONV1351:%.*]] = trunc i32 [[COND1350]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1351]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP558:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1352:%.*]] = zext i16 [[TMP558]] to i32 +// SIMD-ONLY0-NEXT: [[TMP559:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1353:%.*]] = zext i16 [[TMP559]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1354:%.*]] = icmp slt i32 [[CONV1352]], [[CONV1353]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1354]], label [[COND_TRUE1356:%.*]], label [[COND_FALSE1358:%.*]] +// SIMD-ONLY0: cond.true1356: +// SIMD-ONLY0-NEXT: [[TMP560:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1357:%.*]] = zext i16 [[TMP560]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1360:%.*]] +// SIMD-ONLY0: cond.false1358: +// SIMD-ONLY0-NEXT: [[TMP561:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1359:%.*]] = zext i16 [[TMP561]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1360]] +// SIMD-ONLY0: cond.end1360: +// SIMD-ONLY0-NEXT: [[COND1361:%.*]] = phi i32 [ [[CONV1357]], [[COND_TRUE1356]] ], [ [[CONV1359]], [[COND_FALSE1358]] ] +// SIMD-ONLY0-NEXT: [[CONV1362:%.*]] = trunc i32 [[COND1361]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1362]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP562:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1363:%.*]] = zext i16 [[TMP562]] to i32 +// SIMD-ONLY0-NEXT: [[TMP563:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1364:%.*]] = zext i16 [[TMP563]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1365:%.*]] = icmp sgt i32 [[CONV1363]], [[CONV1364]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1365]], label [[IF_THEN1367:%.*]], label [[IF_END1368:%.*]] +// SIMD-ONLY0: if.then1367: +// SIMD-ONLY0-NEXT: [[TMP564:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP564]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1368]] +// SIMD-ONLY0: if.end1368: +// SIMD-ONLY0-NEXT: [[TMP565:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1369:%.*]] = zext i16 [[TMP565]] to i32 +// SIMD-ONLY0-NEXT: [[TMP566:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1370:%.*]] = zext i16 [[TMP566]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1371:%.*]] = icmp slt i32 [[CONV1369]], [[CONV1370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1371]], label [[IF_THEN1373:%.*]], label [[IF_END1374:%.*]] +// SIMD-ONLY0: if.then1373: +// SIMD-ONLY0-NEXT: [[TMP567:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP567]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1374]] +// SIMD-ONLY0: if.end1374: +// SIMD-ONLY0-NEXT: [[TMP568:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1375:%.*]] = zext i16 [[TMP568]] to i32 +// SIMD-ONLY0-NEXT: [[TMP569:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1376:%.*]] = zext i16 [[TMP569]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1377:%.*]] = icmp sgt i32 [[CONV1375]], [[CONV1376]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1377]], label [[IF_THEN1379:%.*]], label [[IF_END1380:%.*]] +// SIMD-ONLY0: if.then1379: +// SIMD-ONLY0-NEXT: [[TMP570:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP570]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1380]] +// SIMD-ONLY0: if.end1380: +// SIMD-ONLY0-NEXT: [[TMP571:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1381:%.*]] = zext i16 [[TMP571]] to i32 +// SIMD-ONLY0-NEXT: [[TMP572:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1382:%.*]] = zext i16 [[TMP572]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1383:%.*]] = icmp slt i32 [[CONV1381]], [[CONV1382]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1383]], label [[IF_THEN1385:%.*]], label [[IF_END1386:%.*]] +// SIMD-ONLY0: if.then1385: +// SIMD-ONLY0-NEXT: [[TMP573:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP573]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1386]] +// SIMD-ONLY0: if.end1386: +// SIMD-ONLY0-NEXT: [[TMP574:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1387:%.*]] = zext i16 [[TMP574]] to i32 +// SIMD-ONLY0-NEXT: [[TMP575:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1388:%.*]] = zext i16 [[TMP575]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1389:%.*]] = icmp eq i32 [[CONV1387]], [[CONV1388]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1389]], label [[COND_TRUE1391:%.*]], label [[COND_FALSE1393:%.*]] +// SIMD-ONLY0: cond.true1391: +// SIMD-ONLY0-NEXT: [[TMP576:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1392:%.*]] = zext i16 [[TMP576]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1395:%.*]] +// SIMD-ONLY0: cond.false1393: +// SIMD-ONLY0-NEXT: [[TMP577:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1394:%.*]] = zext i16 [[TMP577]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1395]] +// SIMD-ONLY0: cond.end1395: +// SIMD-ONLY0-NEXT: [[COND1396:%.*]] = phi i32 [ [[CONV1392]], [[COND_TRUE1391]] ], [ [[CONV1394]], [[COND_FALSE1393]] ] +// SIMD-ONLY0-NEXT: [[CONV1397:%.*]] = trunc i32 [[COND1396]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1397]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP578:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1398:%.*]] = zext i16 [[TMP578]] to i32 +// SIMD-ONLY0-NEXT: [[TMP579:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1399:%.*]] = zext i16 [[TMP579]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1400:%.*]] = icmp eq i32 [[CONV1398]], [[CONV1399]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1400]], label [[COND_TRUE1402:%.*]], label [[COND_FALSE1404:%.*]] +// SIMD-ONLY0: cond.true1402: +// SIMD-ONLY0-NEXT: [[TMP580:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1403:%.*]] = zext i16 [[TMP580]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1406:%.*]] +// SIMD-ONLY0: cond.false1404: +// SIMD-ONLY0-NEXT: [[TMP581:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1405:%.*]] = zext i16 [[TMP581]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1406]] +// SIMD-ONLY0: cond.end1406: +// SIMD-ONLY0-NEXT: [[COND1407:%.*]] = phi i32 [ [[CONV1403]], [[COND_TRUE1402]] ], [ [[CONV1405]], [[COND_FALSE1404]] ] +// SIMD-ONLY0-NEXT: [[CONV1408:%.*]] = trunc i32 [[COND1407]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1408]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP582:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1409:%.*]] = zext i16 [[TMP582]] to i32 +// SIMD-ONLY0-NEXT: [[TMP583:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1410:%.*]] = zext i16 [[TMP583]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1411:%.*]] = icmp eq i32 [[CONV1409]], [[CONV1410]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1411]], label [[IF_THEN1413:%.*]], label [[IF_END1414:%.*]] +// SIMD-ONLY0: if.then1413: +// SIMD-ONLY0-NEXT: [[TMP584:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP584]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1414]] +// SIMD-ONLY0: if.end1414: +// SIMD-ONLY0-NEXT: [[TMP585:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1415:%.*]] = zext i16 [[TMP585]] to i32 +// SIMD-ONLY0-NEXT: [[TMP586:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1416:%.*]] = zext i16 [[TMP586]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1417:%.*]] = icmp eq i32 [[CONV1415]], [[CONV1416]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1417]], label [[IF_THEN1419:%.*]], label [[IF_END1420:%.*]] +// SIMD-ONLY0: if.then1419: +// SIMD-ONLY0-NEXT: [[TMP587:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP587]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1420]] +// SIMD-ONLY0: if.end1420: +// SIMD-ONLY0-NEXT: [[TMP588:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1421:%.*]] = sext i16 [[TMP588]] to i32 +// SIMD-ONLY0-NEXT: [[TMP589:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1422:%.*]] = sext i16 [[TMP589]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1423:%.*]] = icmp sgt i32 [[CONV1421]], [[CONV1422]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1423]], label [[COND_TRUE1425:%.*]], label [[COND_FALSE1427:%.*]] +// SIMD-ONLY0: cond.true1425: +// SIMD-ONLY0-NEXT: [[TMP590:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1426:%.*]] = sext i16 [[TMP590]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1429:%.*]] +// SIMD-ONLY0: cond.false1427: +// SIMD-ONLY0-NEXT: [[TMP591:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1428:%.*]] = sext i16 [[TMP591]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1429]] +// SIMD-ONLY0: cond.end1429: +// SIMD-ONLY0-NEXT: [[COND1430:%.*]] = phi i32 [ [[CONV1426]], [[COND_TRUE1425]] ], [ [[CONV1428]], [[COND_FALSE1427]] ] +// SIMD-ONLY0-NEXT: [[CONV1431:%.*]] = trunc i32 [[COND1430]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1431]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP592:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1432:%.*]] = sext i16 [[TMP592]] to i32 +// SIMD-ONLY0-NEXT: [[TMP593:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1433:%.*]] = sext i16 [[TMP593]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1434:%.*]] = icmp slt i32 [[CONV1432]], [[CONV1433]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1434]], label [[COND_TRUE1436:%.*]], label [[COND_FALSE1438:%.*]] +// SIMD-ONLY0: cond.true1436: +// SIMD-ONLY0-NEXT: [[TMP594:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1437:%.*]] = sext i16 [[TMP594]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1440:%.*]] +// SIMD-ONLY0: cond.false1438: +// SIMD-ONLY0-NEXT: [[TMP595:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1439:%.*]] = sext i16 [[TMP595]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1440]] +// SIMD-ONLY0: cond.end1440: +// SIMD-ONLY0-NEXT: [[COND1441:%.*]] = phi i32 [ [[CONV1437]], [[COND_TRUE1436]] ], [ [[CONV1439]], [[COND_FALSE1438]] ] +// SIMD-ONLY0-NEXT: [[CONV1442:%.*]] = trunc i32 [[COND1441]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1442]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP596:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1443:%.*]] = sext i16 [[TMP596]] to i32 +// SIMD-ONLY0-NEXT: [[TMP597:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1444:%.*]] = sext i16 [[TMP597]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1445:%.*]] = icmp sgt i32 [[CONV1443]], [[CONV1444]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1445]], label [[COND_TRUE1447:%.*]], label [[COND_FALSE1449:%.*]] +// SIMD-ONLY0: cond.true1447: +// SIMD-ONLY0-NEXT: [[TMP598:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1448:%.*]] = sext i16 [[TMP598]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1451:%.*]] +// SIMD-ONLY0: cond.false1449: +// SIMD-ONLY0-NEXT: [[TMP599:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1450:%.*]] = sext i16 [[TMP599]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1451]] +// SIMD-ONLY0: cond.end1451: +// SIMD-ONLY0-NEXT: [[COND1452:%.*]] = phi i32 [ [[CONV1448]], [[COND_TRUE1447]] ], [ [[CONV1450]], [[COND_FALSE1449]] ] +// SIMD-ONLY0-NEXT: [[CONV1453:%.*]] = trunc i32 [[COND1452]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1453]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP600:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1454:%.*]] = sext i16 [[TMP600]] to i32 +// SIMD-ONLY0-NEXT: [[TMP601:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1455:%.*]] = sext i16 [[TMP601]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1456:%.*]] = icmp slt i32 [[CONV1454]], [[CONV1455]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1456]], label [[COND_TRUE1458:%.*]], label [[COND_FALSE1460:%.*]] +// SIMD-ONLY0: cond.true1458: +// SIMD-ONLY0-NEXT: [[TMP602:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1459:%.*]] = sext i16 [[TMP602]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1462:%.*]] +// SIMD-ONLY0: cond.false1460: +// SIMD-ONLY0-NEXT: [[TMP603:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1461:%.*]] = sext i16 [[TMP603]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1462]] +// SIMD-ONLY0: cond.end1462: +// SIMD-ONLY0-NEXT: [[COND1463:%.*]] = phi i32 [ [[CONV1459]], [[COND_TRUE1458]] ], [ [[CONV1461]], [[COND_FALSE1460]] ] +// SIMD-ONLY0-NEXT: [[CONV1464:%.*]] = trunc i32 [[COND1463]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1464]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP604:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1465:%.*]] = sext i16 [[TMP604]] to i32 +// SIMD-ONLY0-NEXT: [[TMP605:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1466:%.*]] = sext i16 [[TMP605]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1467:%.*]] = icmp sgt i32 [[CONV1465]], [[CONV1466]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1467]], label [[IF_THEN1469:%.*]], label [[IF_END1470:%.*]] +// SIMD-ONLY0: if.then1469: +// SIMD-ONLY0-NEXT: [[TMP606:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP606]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1470]] +// SIMD-ONLY0: if.end1470: +// SIMD-ONLY0-NEXT: [[TMP607:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1471:%.*]] = sext i16 [[TMP607]] to i32 +// SIMD-ONLY0-NEXT: [[TMP608:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1472:%.*]] = sext i16 [[TMP608]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1473:%.*]] = icmp slt i32 [[CONV1471]], [[CONV1472]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1473]], label [[IF_THEN1475:%.*]], label [[IF_END1476:%.*]] +// SIMD-ONLY0: if.then1475: +// SIMD-ONLY0-NEXT: [[TMP609:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP609]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1476]] +// SIMD-ONLY0: if.end1476: +// SIMD-ONLY0-NEXT: [[TMP610:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1477:%.*]] = sext i16 [[TMP610]] to i32 +// SIMD-ONLY0-NEXT: [[TMP611:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1478:%.*]] = sext i16 [[TMP611]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1479:%.*]] = icmp sgt i32 [[CONV1477]], [[CONV1478]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1479]], label [[IF_THEN1481:%.*]], label [[IF_END1482:%.*]] +// SIMD-ONLY0: if.then1481: +// SIMD-ONLY0-NEXT: [[TMP612:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP612]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1482]] +// SIMD-ONLY0: if.end1482: +// SIMD-ONLY0-NEXT: [[TMP613:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1483:%.*]] = sext i16 [[TMP613]] to i32 +// SIMD-ONLY0-NEXT: [[TMP614:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1484:%.*]] = sext i16 [[TMP614]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1485:%.*]] = icmp slt i32 [[CONV1483]], [[CONV1484]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1485]], label [[IF_THEN1487:%.*]], label [[IF_END1488:%.*]] +// SIMD-ONLY0: if.then1487: +// SIMD-ONLY0-NEXT: [[TMP615:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP615]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1488]] +// SIMD-ONLY0: if.end1488: +// SIMD-ONLY0-NEXT: [[TMP616:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1489:%.*]] = sext i16 [[TMP616]] to i32 +// SIMD-ONLY0-NEXT: [[TMP617:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1490:%.*]] = sext i16 [[TMP617]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1491:%.*]] = icmp eq i32 [[CONV1489]], [[CONV1490]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1491]], label [[COND_TRUE1493:%.*]], label [[COND_FALSE1495:%.*]] +// SIMD-ONLY0: cond.true1493: +// SIMD-ONLY0-NEXT: [[TMP618:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1494:%.*]] = sext i16 [[TMP618]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1497:%.*]] +// SIMD-ONLY0: cond.false1495: +// SIMD-ONLY0-NEXT: [[TMP619:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1496:%.*]] = sext i16 [[TMP619]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1497]] +// SIMD-ONLY0: cond.end1497: +// SIMD-ONLY0-NEXT: [[COND1498:%.*]] = phi i32 [ [[CONV1494]], [[COND_TRUE1493]] ], [ [[CONV1496]], [[COND_FALSE1495]] ] +// SIMD-ONLY0-NEXT: [[CONV1499:%.*]] = trunc i32 [[COND1498]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1499]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP620:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1500:%.*]] = sext i16 [[TMP620]] to i32 +// SIMD-ONLY0-NEXT: [[TMP621:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1501:%.*]] = sext i16 [[TMP621]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1502:%.*]] = icmp eq i32 [[CONV1500]], [[CONV1501]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1502]], label [[COND_TRUE1504:%.*]], label [[COND_FALSE1506:%.*]] +// SIMD-ONLY0: cond.true1504: +// SIMD-ONLY0-NEXT: [[TMP622:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1505:%.*]] = sext i16 [[TMP622]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1508:%.*]] +// SIMD-ONLY0: cond.false1506: +// SIMD-ONLY0-NEXT: [[TMP623:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1507:%.*]] = sext i16 [[TMP623]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1508]] +// SIMD-ONLY0: cond.end1508: +// SIMD-ONLY0-NEXT: [[COND1509:%.*]] = phi i32 [ [[CONV1505]], [[COND_TRUE1504]] ], [ [[CONV1507]], [[COND_FALSE1506]] ] +// SIMD-ONLY0-NEXT: [[CONV1510:%.*]] = trunc i32 [[COND1509]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1510]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP624:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1511:%.*]] = sext i16 [[TMP624]] to i32 +// SIMD-ONLY0-NEXT: [[TMP625:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1512:%.*]] = sext i16 [[TMP625]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1513:%.*]] = icmp eq i32 [[CONV1511]], [[CONV1512]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1513]], label [[IF_THEN1515:%.*]], label [[IF_END1516:%.*]] +// SIMD-ONLY0: if.then1515: +// SIMD-ONLY0-NEXT: [[TMP626:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP626]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1516]] +// SIMD-ONLY0: if.end1516: +// SIMD-ONLY0-NEXT: [[TMP627:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1517:%.*]] = sext i16 [[TMP627]] to i32 +// SIMD-ONLY0-NEXT: [[TMP628:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1518:%.*]] = sext i16 [[TMP628]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1519:%.*]] = icmp eq i32 [[CONV1517]], [[CONV1518]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1519]], label [[IF_THEN1521:%.*]], label [[IF_END1522:%.*]] +// SIMD-ONLY0: if.then1521: +// SIMD-ONLY0-NEXT: [[TMP629:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP629]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1522]] +// SIMD-ONLY0: if.end1522: +// SIMD-ONLY0-NEXT: [[TMP630:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1523:%.*]] = zext i16 [[TMP630]] to i32 +// SIMD-ONLY0-NEXT: [[TMP631:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1524:%.*]] = zext i16 [[TMP631]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1525:%.*]] = icmp sgt i32 [[CONV1523]], [[CONV1524]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1525]], label [[COND_TRUE1527:%.*]], label [[COND_FALSE1529:%.*]] +// SIMD-ONLY0: cond.true1527: +// SIMD-ONLY0-NEXT: [[TMP632:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1528:%.*]] = zext i16 [[TMP632]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1531:%.*]] +// SIMD-ONLY0: cond.false1529: +// SIMD-ONLY0-NEXT: [[TMP633:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1530:%.*]] = zext i16 [[TMP633]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1531]] +// SIMD-ONLY0: cond.end1531: +// SIMD-ONLY0-NEXT: [[COND1532:%.*]] = phi i32 [ [[CONV1528]], [[COND_TRUE1527]] ], [ [[CONV1530]], [[COND_FALSE1529]] ] +// SIMD-ONLY0-NEXT: [[CONV1533:%.*]] = trunc i32 [[COND1532]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1533]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP634:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1534:%.*]] = zext i16 [[TMP634]] to i32 +// SIMD-ONLY0-NEXT: [[TMP635:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1535:%.*]] = zext i16 [[TMP635]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1536:%.*]] = icmp slt i32 [[CONV1534]], [[CONV1535]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1536]], label [[COND_TRUE1538:%.*]], label [[COND_FALSE1540:%.*]] +// SIMD-ONLY0: cond.true1538: +// SIMD-ONLY0-NEXT: [[TMP636:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1539:%.*]] = zext i16 [[TMP636]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1542:%.*]] +// SIMD-ONLY0: cond.false1540: +// SIMD-ONLY0-NEXT: [[TMP637:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1541:%.*]] = zext i16 [[TMP637]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1542]] +// SIMD-ONLY0: cond.end1542: +// SIMD-ONLY0-NEXT: [[COND1543:%.*]] = phi i32 [ [[CONV1539]], [[COND_TRUE1538]] ], [ [[CONV1541]], [[COND_FALSE1540]] ] +// SIMD-ONLY0-NEXT: [[CONV1544:%.*]] = trunc i32 [[COND1543]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1544]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP638:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1545:%.*]] = zext i16 [[TMP638]] to i32 +// SIMD-ONLY0-NEXT: [[TMP639:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1546:%.*]] = zext i16 [[TMP639]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1547:%.*]] = icmp sgt i32 [[CONV1545]], [[CONV1546]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1547]], label [[COND_TRUE1549:%.*]], label [[COND_FALSE1551:%.*]] +// SIMD-ONLY0: cond.true1549: +// SIMD-ONLY0-NEXT: [[TMP640:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1550:%.*]] = zext i16 [[TMP640]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1553:%.*]] +// SIMD-ONLY0: cond.false1551: +// SIMD-ONLY0-NEXT: [[TMP641:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1552:%.*]] = zext i16 [[TMP641]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1553]] +// SIMD-ONLY0: cond.end1553: +// SIMD-ONLY0-NEXT: [[COND1554:%.*]] = phi i32 [ [[CONV1550]], [[COND_TRUE1549]] ], [ [[CONV1552]], [[COND_FALSE1551]] ] +// SIMD-ONLY0-NEXT: [[CONV1555:%.*]] = trunc i32 [[COND1554]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1555]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP642:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1556:%.*]] = zext i16 [[TMP642]] to i32 +// SIMD-ONLY0-NEXT: [[TMP643:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1557:%.*]] = zext i16 [[TMP643]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1558:%.*]] = icmp slt i32 [[CONV1556]], [[CONV1557]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1558]], label [[COND_TRUE1560:%.*]], label [[COND_FALSE1562:%.*]] +// SIMD-ONLY0: cond.true1560: +// SIMD-ONLY0-NEXT: [[TMP644:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1561:%.*]] = zext i16 [[TMP644]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1564:%.*]] +// SIMD-ONLY0: cond.false1562: +// SIMD-ONLY0-NEXT: [[TMP645:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1563:%.*]] = zext i16 [[TMP645]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1564]] +// SIMD-ONLY0: cond.end1564: +// SIMD-ONLY0-NEXT: [[COND1565:%.*]] = phi i32 [ [[CONV1561]], [[COND_TRUE1560]] ], [ [[CONV1563]], [[COND_FALSE1562]] ] +// SIMD-ONLY0-NEXT: [[CONV1566:%.*]] = trunc i32 [[COND1565]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1566]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP646:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1567:%.*]] = zext i16 [[TMP646]] to i32 +// SIMD-ONLY0-NEXT: [[TMP647:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1568:%.*]] = zext i16 [[TMP647]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1569:%.*]] = icmp sgt i32 [[CONV1567]], [[CONV1568]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1569]], label [[IF_THEN1571:%.*]], label [[IF_END1572:%.*]] +// SIMD-ONLY0: if.then1571: +// SIMD-ONLY0-NEXT: [[TMP648:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP648]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1572]] +// SIMD-ONLY0: if.end1572: +// SIMD-ONLY0-NEXT: [[TMP649:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1573:%.*]] = zext i16 [[TMP649]] to i32 +// SIMD-ONLY0-NEXT: [[TMP650:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1574:%.*]] = zext i16 [[TMP650]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1575:%.*]] = icmp slt i32 [[CONV1573]], [[CONV1574]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1575]], label [[IF_THEN1577:%.*]], label [[IF_END1578:%.*]] +// SIMD-ONLY0: if.then1577: +// SIMD-ONLY0-NEXT: [[TMP651:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP651]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1578]] +// SIMD-ONLY0: if.end1578: +// SIMD-ONLY0-NEXT: [[TMP652:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1579:%.*]] = zext i16 [[TMP652]] to i32 +// SIMD-ONLY0-NEXT: [[TMP653:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1580:%.*]] = zext i16 [[TMP653]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1581:%.*]] = icmp sgt i32 [[CONV1579]], [[CONV1580]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1581]], label [[IF_THEN1583:%.*]], label [[IF_END1584:%.*]] +// SIMD-ONLY0: if.then1583: +// SIMD-ONLY0-NEXT: [[TMP654:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP654]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1584]] +// SIMD-ONLY0: if.end1584: +// SIMD-ONLY0-NEXT: [[TMP655:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1585:%.*]] = zext i16 [[TMP655]] to i32 +// SIMD-ONLY0-NEXT: [[TMP656:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1586:%.*]] = zext i16 [[TMP656]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1587:%.*]] = icmp slt i32 [[CONV1585]], [[CONV1586]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1587]], label [[IF_THEN1589:%.*]], label [[IF_END1590:%.*]] +// SIMD-ONLY0: if.then1589: +// SIMD-ONLY0-NEXT: [[TMP657:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP657]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1590]] +// SIMD-ONLY0: if.end1590: +// SIMD-ONLY0-NEXT: [[TMP658:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1591:%.*]] = zext i16 [[TMP658]] to i32 +// SIMD-ONLY0-NEXT: [[TMP659:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1592:%.*]] = zext i16 [[TMP659]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1593:%.*]] = icmp eq i32 [[CONV1591]], [[CONV1592]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1593]], label [[COND_TRUE1595:%.*]], label [[COND_FALSE1597:%.*]] +// SIMD-ONLY0: cond.true1595: +// SIMD-ONLY0-NEXT: [[TMP660:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1596:%.*]] = zext i16 [[TMP660]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1599:%.*]] +// SIMD-ONLY0: cond.false1597: +// SIMD-ONLY0-NEXT: [[TMP661:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1598:%.*]] = zext i16 [[TMP661]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1599]] +// SIMD-ONLY0: cond.end1599: +// SIMD-ONLY0-NEXT: [[COND1600:%.*]] = phi i32 [ [[CONV1596]], [[COND_TRUE1595]] ], [ [[CONV1598]], [[COND_FALSE1597]] ] +// SIMD-ONLY0-NEXT: [[CONV1601:%.*]] = trunc i32 [[COND1600]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1601]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP662:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1602:%.*]] = zext i16 [[TMP662]] to i32 +// SIMD-ONLY0-NEXT: [[TMP663:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1603:%.*]] = zext i16 [[TMP663]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1604:%.*]] = icmp eq i32 [[CONV1602]], [[CONV1603]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1604]], label [[COND_TRUE1606:%.*]], label [[COND_FALSE1608:%.*]] +// SIMD-ONLY0: cond.true1606: +// SIMD-ONLY0-NEXT: [[TMP664:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1607:%.*]] = zext i16 [[TMP664]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1610:%.*]] +// SIMD-ONLY0: cond.false1608: +// SIMD-ONLY0-NEXT: [[TMP665:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1609:%.*]] = zext i16 [[TMP665]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1610]] +// SIMD-ONLY0: cond.end1610: +// SIMD-ONLY0-NEXT: [[COND1611:%.*]] = phi i32 [ [[CONV1607]], [[COND_TRUE1606]] ], [ [[CONV1609]], [[COND_FALSE1608]] ] +// SIMD-ONLY0-NEXT: [[CONV1612:%.*]] = trunc i32 [[COND1611]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1612]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP666:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1613:%.*]] = zext i16 [[TMP666]] to i32 +// SIMD-ONLY0-NEXT: [[TMP667:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1614:%.*]] = zext i16 [[TMP667]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1615:%.*]] = icmp eq i32 [[CONV1613]], [[CONV1614]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1615]], label [[IF_THEN1617:%.*]], label [[IF_END1618:%.*]] +// SIMD-ONLY0: if.then1617: +// SIMD-ONLY0-NEXT: [[TMP668:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP668]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1618]] +// SIMD-ONLY0: if.end1618: +// SIMD-ONLY0-NEXT: [[TMP669:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1619:%.*]] = zext i16 [[TMP669]] to i32 +// SIMD-ONLY0-NEXT: [[TMP670:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1620:%.*]] = zext i16 [[TMP670]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1621:%.*]] = icmp eq i32 [[CONV1619]], [[CONV1620]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1621]], label [[IF_THEN1623:%.*]], label [[IF_END1624:%.*]] +// SIMD-ONLY0: if.then1623: +// SIMD-ONLY0-NEXT: [[TMP671:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP671]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1624]] +// SIMD-ONLY0: if.end1624: +// SIMD-ONLY0-NEXT: [[TMP672:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1625:%.*]] = sext i16 [[TMP672]] to i32 +// SIMD-ONLY0-NEXT: [[TMP673:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1626:%.*]] = sext i16 [[TMP673]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1627:%.*]] = icmp sgt i32 [[CONV1625]], [[CONV1626]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1627]], label [[COND_TRUE1629:%.*]], label [[COND_FALSE1631:%.*]] +// SIMD-ONLY0: cond.true1629: +// SIMD-ONLY0-NEXT: [[TMP674:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1630:%.*]] = sext i16 [[TMP674]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1633:%.*]] +// SIMD-ONLY0: cond.false1631: +// SIMD-ONLY0-NEXT: [[TMP675:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1632:%.*]] = sext i16 [[TMP675]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1633]] +// SIMD-ONLY0: cond.end1633: +// SIMD-ONLY0-NEXT: [[COND1634:%.*]] = phi i32 [ [[CONV1630]], [[COND_TRUE1629]] ], [ [[CONV1632]], [[COND_FALSE1631]] ] +// SIMD-ONLY0-NEXT: [[CONV1635:%.*]] = trunc i32 [[COND1634]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1635]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP676:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1636:%.*]] = sext i16 [[TMP676]] to i32 +// SIMD-ONLY0-NEXT: [[TMP677:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1637:%.*]] = sext i16 [[TMP677]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1638:%.*]] = icmp slt i32 [[CONV1636]], [[CONV1637]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1638]], label [[COND_TRUE1640:%.*]], label [[COND_FALSE1642:%.*]] +// SIMD-ONLY0: cond.true1640: +// SIMD-ONLY0-NEXT: [[TMP678:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1641:%.*]] = sext i16 [[TMP678]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1644:%.*]] +// SIMD-ONLY0: cond.false1642: +// SIMD-ONLY0-NEXT: [[TMP679:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1643:%.*]] = sext i16 [[TMP679]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1644]] +// SIMD-ONLY0: cond.end1644: +// SIMD-ONLY0-NEXT: [[COND1645:%.*]] = phi i32 [ [[CONV1641]], [[COND_TRUE1640]] ], [ [[CONV1643]], [[COND_FALSE1642]] ] +// SIMD-ONLY0-NEXT: [[CONV1646:%.*]] = trunc i32 [[COND1645]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1646]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP680:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1647:%.*]] = sext i16 [[TMP680]] to i32 +// SIMD-ONLY0-NEXT: [[TMP681:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1648:%.*]] = sext i16 [[TMP681]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1649:%.*]] = icmp sgt i32 [[CONV1647]], [[CONV1648]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1649]], label [[COND_TRUE1651:%.*]], label [[COND_FALSE1653:%.*]] +// SIMD-ONLY0: cond.true1651: +// SIMD-ONLY0-NEXT: [[TMP682:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1652:%.*]] = sext i16 [[TMP682]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1655:%.*]] +// SIMD-ONLY0: cond.false1653: +// SIMD-ONLY0-NEXT: [[TMP683:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1654:%.*]] = sext i16 [[TMP683]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1655]] +// SIMD-ONLY0: cond.end1655: +// SIMD-ONLY0-NEXT: [[COND1656:%.*]] = phi i32 [ [[CONV1652]], [[COND_TRUE1651]] ], [ [[CONV1654]], [[COND_FALSE1653]] ] +// SIMD-ONLY0-NEXT: [[CONV1657:%.*]] = trunc i32 [[COND1656]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1657]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP684:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1658:%.*]] = sext i16 [[TMP684]] to i32 +// SIMD-ONLY0-NEXT: [[TMP685:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1659:%.*]] = sext i16 [[TMP685]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1660:%.*]] = icmp slt i32 [[CONV1658]], [[CONV1659]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1660]], label [[COND_TRUE1662:%.*]], label [[COND_FALSE1664:%.*]] +// SIMD-ONLY0: cond.true1662: +// SIMD-ONLY0-NEXT: [[TMP686:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1663:%.*]] = sext i16 [[TMP686]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1666:%.*]] +// SIMD-ONLY0: cond.false1664: +// SIMD-ONLY0-NEXT: [[TMP687:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1665:%.*]] = sext i16 [[TMP687]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1666]] +// SIMD-ONLY0: cond.end1666: +// SIMD-ONLY0-NEXT: [[COND1667:%.*]] = phi i32 [ [[CONV1663]], [[COND_TRUE1662]] ], [ [[CONV1665]], [[COND_FALSE1664]] ] +// SIMD-ONLY0-NEXT: [[CONV1668:%.*]] = trunc i32 [[COND1667]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1668]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP688:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1669:%.*]] = sext i16 [[TMP688]] to i32 +// SIMD-ONLY0-NEXT: [[TMP689:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1670:%.*]] = sext i16 [[TMP689]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1671:%.*]] = icmp sgt i32 [[CONV1669]], [[CONV1670]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1671]], label [[IF_THEN1673:%.*]], label [[IF_END1674:%.*]] +// SIMD-ONLY0: if.then1673: +// SIMD-ONLY0-NEXT: [[TMP690:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP690]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1674]] +// SIMD-ONLY0: if.end1674: +// SIMD-ONLY0-NEXT: [[TMP691:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1675:%.*]] = sext i16 [[TMP691]] to i32 +// SIMD-ONLY0-NEXT: [[TMP692:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1676:%.*]] = sext i16 [[TMP692]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1677:%.*]] = icmp slt i32 [[CONV1675]], [[CONV1676]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1677]], label [[IF_THEN1679:%.*]], label [[IF_END1680:%.*]] +// SIMD-ONLY0: if.then1679: +// SIMD-ONLY0-NEXT: [[TMP693:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP693]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1680]] +// SIMD-ONLY0: if.end1680: +// SIMD-ONLY0-NEXT: [[TMP694:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1681:%.*]] = sext i16 [[TMP694]] to i32 +// SIMD-ONLY0-NEXT: [[TMP695:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1682:%.*]] = sext i16 [[TMP695]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1683:%.*]] = icmp sgt i32 [[CONV1681]], [[CONV1682]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1683]], label [[IF_THEN1685:%.*]], label [[IF_END1686:%.*]] +// SIMD-ONLY0: if.then1685: +// SIMD-ONLY0-NEXT: [[TMP696:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP696]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1686]] +// SIMD-ONLY0: if.end1686: +// SIMD-ONLY0-NEXT: [[TMP697:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1687:%.*]] = sext i16 [[TMP697]] to i32 +// SIMD-ONLY0-NEXT: [[TMP698:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1688:%.*]] = sext i16 [[TMP698]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1689:%.*]] = icmp slt i32 [[CONV1687]], [[CONV1688]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1689]], label [[IF_THEN1691:%.*]], label [[IF_END1692:%.*]] +// SIMD-ONLY0: if.then1691: +// SIMD-ONLY0-NEXT: [[TMP699:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP699]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1692]] +// SIMD-ONLY0: if.end1692: +// SIMD-ONLY0-NEXT: [[TMP700:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1693:%.*]] = sext i16 [[TMP700]] to i32 +// SIMD-ONLY0-NEXT: [[TMP701:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1694:%.*]] = sext i16 [[TMP701]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1695:%.*]] = icmp eq i32 [[CONV1693]], [[CONV1694]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1695]], label [[COND_TRUE1697:%.*]], label [[COND_FALSE1699:%.*]] +// SIMD-ONLY0: cond.true1697: +// SIMD-ONLY0-NEXT: [[TMP702:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1698:%.*]] = sext i16 [[TMP702]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1701:%.*]] +// SIMD-ONLY0: cond.false1699: +// SIMD-ONLY0-NEXT: [[TMP703:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1700:%.*]] = sext i16 [[TMP703]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1701]] +// SIMD-ONLY0: cond.end1701: +// SIMD-ONLY0-NEXT: [[COND1702:%.*]] = phi i32 [ [[CONV1698]], [[COND_TRUE1697]] ], [ [[CONV1700]], [[COND_FALSE1699]] ] +// SIMD-ONLY0-NEXT: [[CONV1703:%.*]] = trunc i32 [[COND1702]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1703]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP704:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1704:%.*]] = sext i16 [[TMP704]] to i32 +// SIMD-ONLY0-NEXT: [[TMP705:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1705:%.*]] = sext i16 [[TMP705]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1706:%.*]] = icmp eq i32 [[CONV1704]], [[CONV1705]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1706]], label [[COND_TRUE1708:%.*]], label [[COND_FALSE1710:%.*]] +// SIMD-ONLY0: cond.true1708: +// SIMD-ONLY0-NEXT: [[TMP706:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1709:%.*]] = sext i16 [[TMP706]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1712:%.*]] +// SIMD-ONLY0: cond.false1710: +// SIMD-ONLY0-NEXT: [[TMP707:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1711:%.*]] = sext i16 [[TMP707]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1712]] +// SIMD-ONLY0: cond.end1712: +// SIMD-ONLY0-NEXT: [[COND1713:%.*]] = phi i32 [ [[CONV1709]], [[COND_TRUE1708]] ], [ [[CONV1711]], [[COND_FALSE1710]] ] +// SIMD-ONLY0-NEXT: [[CONV1714:%.*]] = trunc i32 [[COND1713]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1714]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP708:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1715:%.*]] = sext i16 [[TMP708]] to i32 +// SIMD-ONLY0-NEXT: [[TMP709:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1716:%.*]] = sext i16 [[TMP709]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1717:%.*]] = icmp eq i32 [[CONV1715]], [[CONV1716]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1717]], label [[IF_THEN1719:%.*]], label [[IF_END1720:%.*]] +// SIMD-ONLY0: if.then1719: +// SIMD-ONLY0-NEXT: [[TMP710:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP710]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1720]] +// SIMD-ONLY0: if.end1720: +// SIMD-ONLY0-NEXT: [[TMP711:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1721:%.*]] = sext i16 [[TMP711]] to i32 +// SIMD-ONLY0-NEXT: [[TMP712:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1722:%.*]] = sext i16 [[TMP712]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1723:%.*]] = icmp eq i32 [[CONV1721]], [[CONV1722]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1723]], label [[IF_THEN1725:%.*]], label [[IF_END1726:%.*]] +// SIMD-ONLY0: if.then1725: +// SIMD-ONLY0-NEXT: [[TMP713:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP713]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1726]] +// SIMD-ONLY0: if.end1726: +// SIMD-ONLY0-NEXT: [[TMP714:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1727:%.*]] = zext i16 [[TMP714]] to i32 +// SIMD-ONLY0-NEXT: [[TMP715:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1728:%.*]] = zext i16 [[TMP715]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1729:%.*]] = icmp sgt i32 [[CONV1727]], [[CONV1728]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1729]], label [[COND_TRUE1731:%.*]], label [[COND_FALSE1733:%.*]] +// SIMD-ONLY0: cond.true1731: +// SIMD-ONLY0-NEXT: [[TMP716:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1732:%.*]] = zext i16 [[TMP716]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1735:%.*]] +// SIMD-ONLY0: cond.false1733: +// SIMD-ONLY0-NEXT: [[TMP717:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1734:%.*]] = zext i16 [[TMP717]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1735]] +// SIMD-ONLY0: cond.end1735: +// SIMD-ONLY0-NEXT: [[COND1736:%.*]] = phi i32 [ [[CONV1732]], [[COND_TRUE1731]] ], [ [[CONV1734]], [[COND_FALSE1733]] ] +// SIMD-ONLY0-NEXT: [[CONV1737:%.*]] = trunc i32 [[COND1736]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1737]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP718:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1738:%.*]] = zext i16 [[TMP718]] to i32 +// SIMD-ONLY0-NEXT: [[TMP719:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1739:%.*]] = zext i16 [[TMP719]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1740:%.*]] = icmp slt i32 [[CONV1738]], [[CONV1739]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1740]], label [[COND_TRUE1742:%.*]], label [[COND_FALSE1744:%.*]] +// SIMD-ONLY0: cond.true1742: +// SIMD-ONLY0-NEXT: [[TMP720:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1743:%.*]] = zext i16 [[TMP720]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1746:%.*]] +// SIMD-ONLY0: cond.false1744: +// SIMD-ONLY0-NEXT: [[TMP721:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1745:%.*]] = zext i16 [[TMP721]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1746]] +// SIMD-ONLY0: cond.end1746: +// SIMD-ONLY0-NEXT: [[COND1747:%.*]] = phi i32 [ [[CONV1743]], [[COND_TRUE1742]] ], [ [[CONV1745]], [[COND_FALSE1744]] ] +// SIMD-ONLY0-NEXT: [[CONV1748:%.*]] = trunc i32 [[COND1747]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1748]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP722:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1749:%.*]] = zext i16 [[TMP722]] to i32 +// SIMD-ONLY0-NEXT: [[TMP723:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1750:%.*]] = zext i16 [[TMP723]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1751:%.*]] = icmp sgt i32 [[CONV1749]], [[CONV1750]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1751]], label [[COND_TRUE1753:%.*]], label [[COND_FALSE1755:%.*]] +// SIMD-ONLY0: cond.true1753: +// SIMD-ONLY0-NEXT: [[TMP724:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1754:%.*]] = zext i16 [[TMP724]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1757:%.*]] +// SIMD-ONLY0: cond.false1755: +// SIMD-ONLY0-NEXT: [[TMP725:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1756:%.*]] = zext i16 [[TMP725]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1757]] +// SIMD-ONLY0: cond.end1757: +// SIMD-ONLY0-NEXT: [[COND1758:%.*]] = phi i32 [ [[CONV1754]], [[COND_TRUE1753]] ], [ [[CONV1756]], [[COND_FALSE1755]] ] +// SIMD-ONLY0-NEXT: [[CONV1759:%.*]] = trunc i32 [[COND1758]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1759]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP726:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1760:%.*]] = zext i16 [[TMP726]] to i32 +// SIMD-ONLY0-NEXT: [[TMP727:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1761:%.*]] = zext i16 [[TMP727]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1762:%.*]] = icmp slt i32 [[CONV1760]], [[CONV1761]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1762]], label [[COND_TRUE1764:%.*]], label [[COND_FALSE1766:%.*]] +// SIMD-ONLY0: cond.true1764: +// SIMD-ONLY0-NEXT: [[TMP728:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1765:%.*]] = zext i16 [[TMP728]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1768:%.*]] +// SIMD-ONLY0: cond.false1766: +// SIMD-ONLY0-NEXT: [[TMP729:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1767:%.*]] = zext i16 [[TMP729]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1768]] +// SIMD-ONLY0: cond.end1768: +// SIMD-ONLY0-NEXT: [[COND1769:%.*]] = phi i32 [ [[CONV1765]], [[COND_TRUE1764]] ], [ [[CONV1767]], [[COND_FALSE1766]] ] +// SIMD-ONLY0-NEXT: [[CONV1770:%.*]] = trunc i32 [[COND1769]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1770]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP730:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1771:%.*]] = zext i16 [[TMP730]] to i32 +// SIMD-ONLY0-NEXT: [[TMP731:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1772:%.*]] = zext i16 [[TMP731]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1773:%.*]] = icmp sgt i32 [[CONV1771]], [[CONV1772]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1773]], label [[IF_THEN1775:%.*]], label [[IF_END1776:%.*]] +// SIMD-ONLY0: if.then1775: +// SIMD-ONLY0-NEXT: [[TMP732:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP732]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1776]] +// SIMD-ONLY0: if.end1776: +// SIMD-ONLY0-NEXT: [[TMP733:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1777:%.*]] = zext i16 [[TMP733]] to i32 +// SIMD-ONLY0-NEXT: [[TMP734:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1778:%.*]] = zext i16 [[TMP734]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1779:%.*]] = icmp slt i32 [[CONV1777]], [[CONV1778]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1779]], label [[IF_THEN1781:%.*]], label [[IF_END1782:%.*]] +// SIMD-ONLY0: if.then1781: +// SIMD-ONLY0-NEXT: [[TMP735:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP735]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1782]] +// SIMD-ONLY0: if.end1782: +// SIMD-ONLY0-NEXT: [[TMP736:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1783:%.*]] = zext i16 [[TMP736]] to i32 +// SIMD-ONLY0-NEXT: [[TMP737:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1784:%.*]] = zext i16 [[TMP737]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1785:%.*]] = icmp sgt i32 [[CONV1783]], [[CONV1784]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1785]], label [[IF_THEN1787:%.*]], label [[IF_END1788:%.*]] +// SIMD-ONLY0: if.then1787: +// SIMD-ONLY0-NEXT: [[TMP738:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP738]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1788]] +// SIMD-ONLY0: if.end1788: +// SIMD-ONLY0-NEXT: [[TMP739:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1789:%.*]] = zext i16 [[TMP739]] to i32 +// SIMD-ONLY0-NEXT: [[TMP740:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1790:%.*]] = zext i16 [[TMP740]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1791:%.*]] = icmp slt i32 [[CONV1789]], [[CONV1790]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1791]], label [[IF_THEN1793:%.*]], label [[IF_END1794:%.*]] +// SIMD-ONLY0: if.then1793: +// SIMD-ONLY0-NEXT: [[TMP741:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP741]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1794]] +// SIMD-ONLY0: if.end1794: +// SIMD-ONLY0-NEXT: [[TMP742:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1795:%.*]] = zext i16 [[TMP742]] to i32 +// SIMD-ONLY0-NEXT: [[TMP743:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1796:%.*]] = zext i16 [[TMP743]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1797:%.*]] = icmp eq i32 [[CONV1795]], [[CONV1796]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1797]], label [[COND_TRUE1799:%.*]], label [[COND_FALSE1801:%.*]] +// SIMD-ONLY0: cond.true1799: +// SIMD-ONLY0-NEXT: [[TMP744:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1800:%.*]] = zext i16 [[TMP744]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1803:%.*]] +// SIMD-ONLY0: cond.false1801: +// SIMD-ONLY0-NEXT: [[TMP745:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1802:%.*]] = zext i16 [[TMP745]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1803]] +// SIMD-ONLY0: cond.end1803: +// SIMD-ONLY0-NEXT: [[COND1804:%.*]] = phi i32 [ [[CONV1800]], [[COND_TRUE1799]] ], [ [[CONV1802]], [[COND_FALSE1801]] ] +// SIMD-ONLY0-NEXT: [[CONV1805:%.*]] = trunc i32 [[COND1804]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1805]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP746:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1806:%.*]] = zext i16 [[TMP746]] to i32 +// SIMD-ONLY0-NEXT: [[TMP747:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1807:%.*]] = zext i16 [[TMP747]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1808:%.*]] = icmp eq i32 [[CONV1806]], [[CONV1807]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1808]], label [[COND_TRUE1810:%.*]], label [[COND_FALSE1812:%.*]] +// SIMD-ONLY0: cond.true1810: +// SIMD-ONLY0-NEXT: [[TMP748:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1811:%.*]] = zext i16 [[TMP748]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1814:%.*]] +// SIMD-ONLY0: cond.false1812: +// SIMD-ONLY0-NEXT: [[TMP749:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1813:%.*]] = zext i16 [[TMP749]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1814]] +// SIMD-ONLY0: cond.end1814: +// SIMD-ONLY0-NEXT: [[COND1815:%.*]] = phi i32 [ [[CONV1811]], [[COND_TRUE1810]] ], [ [[CONV1813]], [[COND_FALSE1812]] ] +// SIMD-ONLY0-NEXT: [[CONV1816:%.*]] = trunc i32 [[COND1815]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1816]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP750:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1817:%.*]] = zext i16 [[TMP750]] to i32 +// SIMD-ONLY0-NEXT: [[TMP751:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1818:%.*]] = zext i16 [[TMP751]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1819:%.*]] = icmp eq i32 [[CONV1817]], [[CONV1818]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1819]], label [[IF_THEN1821:%.*]], label [[IF_END1822:%.*]] +// SIMD-ONLY0: if.then1821: +// SIMD-ONLY0-NEXT: [[TMP752:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP752]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1822]] +// SIMD-ONLY0: if.end1822: +// SIMD-ONLY0-NEXT: [[TMP753:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1823:%.*]] = zext i16 [[TMP753]] to i32 +// SIMD-ONLY0-NEXT: [[TMP754:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1824:%.*]] = zext i16 [[TMP754]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1825:%.*]] = icmp eq i32 [[CONV1823]], [[CONV1824]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1825]], label [[IF_THEN1827:%.*]], label [[IF_END1828:%.*]] +// SIMD-ONLY0: if.then1827: +// SIMD-ONLY0-NEXT: [[TMP755:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP755]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1828]] +// SIMD-ONLY0: if.end1828: +// SIMD-ONLY0-NEXT: [[TMP756:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1829:%.*]] = sext i16 [[TMP756]] to i32 +// SIMD-ONLY0-NEXT: [[TMP757:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1830:%.*]] = sext i16 [[TMP757]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1831:%.*]] = icmp sgt i32 [[CONV1829]], [[CONV1830]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1831]], label [[COND_TRUE1833:%.*]], label [[COND_FALSE1835:%.*]] +// SIMD-ONLY0: cond.true1833: +// SIMD-ONLY0-NEXT: [[TMP758:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1834:%.*]] = sext i16 [[TMP758]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1837:%.*]] +// SIMD-ONLY0: cond.false1835: +// SIMD-ONLY0-NEXT: [[TMP759:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1836:%.*]] = sext i16 [[TMP759]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1837]] +// SIMD-ONLY0: cond.end1837: +// SIMD-ONLY0-NEXT: [[COND1838:%.*]] = phi i32 [ [[CONV1834]], [[COND_TRUE1833]] ], [ [[CONV1836]], [[COND_FALSE1835]] ] +// SIMD-ONLY0-NEXT: [[CONV1839:%.*]] = trunc i32 [[COND1838]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1839]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP760:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1840:%.*]] = sext i16 [[TMP760]] to i32 +// SIMD-ONLY0-NEXT: [[TMP761:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1841:%.*]] = sext i16 [[TMP761]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1842:%.*]] = icmp slt i32 [[CONV1840]], [[CONV1841]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1842]], label [[COND_TRUE1844:%.*]], label [[COND_FALSE1846:%.*]] +// SIMD-ONLY0: cond.true1844: +// SIMD-ONLY0-NEXT: [[TMP762:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1845:%.*]] = sext i16 [[TMP762]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1848:%.*]] +// SIMD-ONLY0: cond.false1846: +// SIMD-ONLY0-NEXT: [[TMP763:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1847:%.*]] = sext i16 [[TMP763]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1848]] +// SIMD-ONLY0: cond.end1848: +// SIMD-ONLY0-NEXT: [[COND1849:%.*]] = phi i32 [ [[CONV1845]], [[COND_TRUE1844]] ], [ [[CONV1847]], [[COND_FALSE1846]] ] +// SIMD-ONLY0-NEXT: [[CONV1850:%.*]] = trunc i32 [[COND1849]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1850]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP764:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1851:%.*]] = sext i16 [[TMP764]] to i32 +// SIMD-ONLY0-NEXT: [[TMP765:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1852:%.*]] = sext i16 [[TMP765]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1853:%.*]] = icmp sgt i32 [[CONV1851]], [[CONV1852]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1853]], label [[COND_TRUE1855:%.*]], label [[COND_FALSE1857:%.*]] +// SIMD-ONLY0: cond.true1855: +// SIMD-ONLY0-NEXT: [[TMP766:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1856:%.*]] = sext i16 [[TMP766]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1859:%.*]] +// SIMD-ONLY0: cond.false1857: +// SIMD-ONLY0-NEXT: [[TMP767:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1858:%.*]] = sext i16 [[TMP767]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1859]] +// SIMD-ONLY0: cond.end1859: +// SIMD-ONLY0-NEXT: [[COND1860:%.*]] = phi i32 [ [[CONV1856]], [[COND_TRUE1855]] ], [ [[CONV1858]], [[COND_FALSE1857]] ] +// SIMD-ONLY0-NEXT: [[CONV1861:%.*]] = trunc i32 [[COND1860]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1861]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP768:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1862:%.*]] = sext i16 [[TMP768]] to i32 +// SIMD-ONLY0-NEXT: [[TMP769:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1863:%.*]] = sext i16 [[TMP769]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1864:%.*]] = icmp slt i32 [[CONV1862]], [[CONV1863]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1864]], label [[COND_TRUE1866:%.*]], label [[COND_FALSE1868:%.*]] +// SIMD-ONLY0: cond.true1866: +// SIMD-ONLY0-NEXT: [[TMP770:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1867:%.*]] = sext i16 [[TMP770]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1870:%.*]] +// SIMD-ONLY0: cond.false1868: +// SIMD-ONLY0-NEXT: [[TMP771:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1869:%.*]] = sext i16 [[TMP771]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1870]] +// SIMD-ONLY0: cond.end1870: +// SIMD-ONLY0-NEXT: [[COND1871:%.*]] = phi i32 [ [[CONV1867]], [[COND_TRUE1866]] ], [ [[CONV1869]], [[COND_FALSE1868]] ] +// SIMD-ONLY0-NEXT: [[CONV1872:%.*]] = trunc i32 [[COND1871]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1872]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP772:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1873:%.*]] = sext i16 [[TMP772]] to i32 +// SIMD-ONLY0-NEXT: [[TMP773:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1874:%.*]] = sext i16 [[TMP773]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1875:%.*]] = icmp sgt i32 [[CONV1873]], [[CONV1874]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1875]], label [[IF_THEN1877:%.*]], label [[IF_END1878:%.*]] +// SIMD-ONLY0: if.then1877: +// SIMD-ONLY0-NEXT: [[TMP774:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP774]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1878]] +// SIMD-ONLY0: if.end1878: +// SIMD-ONLY0-NEXT: [[TMP775:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1879:%.*]] = sext i16 [[TMP775]] to i32 +// SIMD-ONLY0-NEXT: [[TMP776:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1880:%.*]] = sext i16 [[TMP776]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1881:%.*]] = icmp slt i32 [[CONV1879]], [[CONV1880]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1881]], label [[IF_THEN1883:%.*]], label [[IF_END1884:%.*]] +// SIMD-ONLY0: if.then1883: +// SIMD-ONLY0-NEXT: [[TMP777:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP777]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1884]] +// SIMD-ONLY0: if.end1884: +// SIMD-ONLY0-NEXT: [[TMP778:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1885:%.*]] = sext i16 [[TMP778]] to i32 +// SIMD-ONLY0-NEXT: [[TMP779:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1886:%.*]] = sext i16 [[TMP779]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1887:%.*]] = icmp sgt i32 [[CONV1885]], [[CONV1886]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1887]], label [[IF_THEN1889:%.*]], label [[IF_END1890:%.*]] +// SIMD-ONLY0: if.then1889: +// SIMD-ONLY0-NEXT: [[TMP780:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP780]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1890]] +// SIMD-ONLY0: if.end1890: +// SIMD-ONLY0-NEXT: [[TMP781:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1891:%.*]] = sext i16 [[TMP781]] to i32 +// SIMD-ONLY0-NEXT: [[TMP782:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1892:%.*]] = sext i16 [[TMP782]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1893:%.*]] = icmp slt i32 [[CONV1891]], [[CONV1892]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1893]], label [[IF_THEN1895:%.*]], label [[IF_END1896:%.*]] +// SIMD-ONLY0: if.then1895: +// SIMD-ONLY0-NEXT: [[TMP783:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP783]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1896]] +// SIMD-ONLY0: if.end1896: +// SIMD-ONLY0-NEXT: [[TMP784:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1897:%.*]] = sext i16 [[TMP784]] to i32 +// SIMD-ONLY0-NEXT: [[TMP785:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1898:%.*]] = sext i16 [[TMP785]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1899:%.*]] = icmp eq i32 [[CONV1897]], [[CONV1898]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1899]], label [[COND_TRUE1901:%.*]], label [[COND_FALSE1903:%.*]] +// SIMD-ONLY0: cond.true1901: +// SIMD-ONLY0-NEXT: [[TMP786:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1902:%.*]] = sext i16 [[TMP786]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1905:%.*]] +// SIMD-ONLY0: cond.false1903: +// SIMD-ONLY0-NEXT: [[TMP787:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1904:%.*]] = sext i16 [[TMP787]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1905]] +// SIMD-ONLY0: cond.end1905: +// SIMD-ONLY0-NEXT: [[COND1906:%.*]] = phi i32 [ [[CONV1902]], [[COND_TRUE1901]] ], [ [[CONV1904]], [[COND_FALSE1903]] ] +// SIMD-ONLY0-NEXT: [[CONV1907:%.*]] = trunc i32 [[COND1906]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1907]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP788:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1908:%.*]] = sext i16 [[TMP788]] to i32 +// SIMD-ONLY0-NEXT: [[TMP789:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1909:%.*]] = sext i16 [[TMP789]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1910:%.*]] = icmp eq i32 [[CONV1908]], [[CONV1909]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1910]], label [[COND_TRUE1912:%.*]], label [[COND_FALSE1914:%.*]] +// SIMD-ONLY0: cond.true1912: +// SIMD-ONLY0-NEXT: [[TMP790:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1913:%.*]] = sext i16 [[TMP790]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1916:%.*]] +// SIMD-ONLY0: cond.false1914: +// SIMD-ONLY0-NEXT: [[TMP791:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1915:%.*]] = sext i16 [[TMP791]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1916]] +// SIMD-ONLY0: cond.end1916: +// SIMD-ONLY0-NEXT: [[COND1917:%.*]] = phi i32 [ [[CONV1913]], [[COND_TRUE1912]] ], [ [[CONV1915]], [[COND_FALSE1914]] ] +// SIMD-ONLY0-NEXT: [[CONV1918:%.*]] = trunc i32 [[COND1917]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1918]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP792:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1919:%.*]] = sext i16 [[TMP792]] to i32 +// SIMD-ONLY0-NEXT: [[TMP793:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1920:%.*]] = sext i16 [[TMP793]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1921:%.*]] = icmp eq i32 [[CONV1919]], [[CONV1920]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1921]], label [[IF_THEN1923:%.*]], label [[IF_END1924:%.*]] +// SIMD-ONLY0: if.then1923: +// SIMD-ONLY0-NEXT: [[TMP794:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP794]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1924]] +// SIMD-ONLY0: if.end1924: +// SIMD-ONLY0-NEXT: [[TMP795:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1925:%.*]] = sext i16 [[TMP795]] to i32 +// SIMD-ONLY0-NEXT: [[TMP796:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1926:%.*]] = sext i16 [[TMP796]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1927:%.*]] = icmp eq i32 [[CONV1925]], [[CONV1926]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1927]], label [[IF_THEN1929:%.*]], label [[IF_END1930:%.*]] +// SIMD-ONLY0: if.then1929: +// SIMD-ONLY0-NEXT: [[TMP797:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP797]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1930]] +// SIMD-ONLY0: if.end1930: +// SIMD-ONLY0-NEXT: [[TMP798:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1931:%.*]] = zext i16 [[TMP798]] to i32 +// SIMD-ONLY0-NEXT: [[TMP799:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1932:%.*]] = zext i16 [[TMP799]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1933:%.*]] = icmp sgt i32 [[CONV1931]], [[CONV1932]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1933]], label [[COND_TRUE1935:%.*]], label [[COND_FALSE1937:%.*]] +// SIMD-ONLY0: cond.true1935: +// SIMD-ONLY0-NEXT: [[TMP800:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1936:%.*]] = zext i16 [[TMP800]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1939:%.*]] +// SIMD-ONLY0: cond.false1937: +// SIMD-ONLY0-NEXT: [[TMP801:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1938:%.*]] = zext i16 [[TMP801]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1939]] +// SIMD-ONLY0: cond.end1939: +// SIMD-ONLY0-NEXT: [[COND1940:%.*]] = phi i32 [ [[CONV1936]], [[COND_TRUE1935]] ], [ [[CONV1938]], [[COND_FALSE1937]] ] +// SIMD-ONLY0-NEXT: [[CONV1941:%.*]] = trunc i32 [[COND1940]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1941]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP802:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1942:%.*]] = zext i16 [[TMP802]] to i32 +// SIMD-ONLY0-NEXT: [[TMP803:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1943:%.*]] = zext i16 [[TMP803]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1944:%.*]] = icmp slt i32 [[CONV1942]], [[CONV1943]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1944]], label [[COND_TRUE1946:%.*]], label [[COND_FALSE1948:%.*]] +// SIMD-ONLY0: cond.true1946: +// SIMD-ONLY0-NEXT: [[TMP804:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1947:%.*]] = zext i16 [[TMP804]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1950:%.*]] +// SIMD-ONLY0: cond.false1948: +// SIMD-ONLY0-NEXT: [[TMP805:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1949:%.*]] = zext i16 [[TMP805]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1950]] +// SIMD-ONLY0: cond.end1950: +// SIMD-ONLY0-NEXT: [[COND1951:%.*]] = phi i32 [ [[CONV1947]], [[COND_TRUE1946]] ], [ [[CONV1949]], [[COND_FALSE1948]] ] +// SIMD-ONLY0-NEXT: [[CONV1952:%.*]] = trunc i32 [[COND1951]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1952]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP806:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1953:%.*]] = zext i16 [[TMP806]] to i32 +// SIMD-ONLY0-NEXT: [[TMP807:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1954:%.*]] = zext i16 [[TMP807]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1955:%.*]] = icmp sgt i32 [[CONV1953]], [[CONV1954]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1955]], label [[COND_TRUE1957:%.*]], label [[COND_FALSE1959:%.*]] +// SIMD-ONLY0: cond.true1957: +// SIMD-ONLY0-NEXT: [[TMP808:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1958:%.*]] = zext i16 [[TMP808]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1961:%.*]] +// SIMD-ONLY0: cond.false1959: +// SIMD-ONLY0-NEXT: [[TMP809:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1960:%.*]] = zext i16 [[TMP809]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1961]] +// SIMD-ONLY0: cond.end1961: +// SIMD-ONLY0-NEXT: [[COND1962:%.*]] = phi i32 [ [[CONV1958]], [[COND_TRUE1957]] ], [ [[CONV1960]], [[COND_FALSE1959]] ] +// SIMD-ONLY0-NEXT: [[CONV1963:%.*]] = trunc i32 [[COND1962]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1963]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP810:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1964:%.*]] = zext i16 [[TMP810]] to i32 +// SIMD-ONLY0-NEXT: [[TMP811:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1965:%.*]] = zext i16 [[TMP811]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1966:%.*]] = icmp slt i32 [[CONV1964]], [[CONV1965]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1966]], label [[COND_TRUE1968:%.*]], label [[COND_FALSE1970:%.*]] +// SIMD-ONLY0: cond.true1968: +// SIMD-ONLY0-NEXT: [[TMP812:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1969:%.*]] = zext i16 [[TMP812]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1972:%.*]] +// SIMD-ONLY0: cond.false1970: +// SIMD-ONLY0-NEXT: [[TMP813:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1971:%.*]] = zext i16 [[TMP813]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END1972]] +// SIMD-ONLY0: cond.end1972: +// SIMD-ONLY0-NEXT: [[COND1973:%.*]] = phi i32 [ [[CONV1969]], [[COND_TRUE1968]] ], [ [[CONV1971]], [[COND_FALSE1970]] ] +// SIMD-ONLY0-NEXT: [[CONV1974:%.*]] = trunc i32 [[COND1973]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1974]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP814:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1975:%.*]] = zext i16 [[TMP814]] to i32 +// SIMD-ONLY0-NEXT: [[TMP815:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1976:%.*]] = zext i16 [[TMP815]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1977:%.*]] = icmp sgt i32 [[CONV1975]], [[CONV1976]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1977]], label [[IF_THEN1979:%.*]], label [[IF_END1980:%.*]] +// SIMD-ONLY0: if.then1979: +// SIMD-ONLY0-NEXT: [[TMP816:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP816]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1980]] +// SIMD-ONLY0: if.end1980: +// SIMD-ONLY0-NEXT: [[TMP817:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1981:%.*]] = zext i16 [[TMP817]] to i32 +// SIMD-ONLY0-NEXT: [[TMP818:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1982:%.*]] = zext i16 [[TMP818]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1983:%.*]] = icmp slt i32 [[CONV1981]], [[CONV1982]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1983]], label [[IF_THEN1985:%.*]], label [[IF_END1986:%.*]] +// SIMD-ONLY0: if.then1985: +// SIMD-ONLY0-NEXT: [[TMP819:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP819]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1986]] +// SIMD-ONLY0: if.end1986: +// SIMD-ONLY0-NEXT: [[TMP820:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1987:%.*]] = zext i16 [[TMP820]] to i32 +// SIMD-ONLY0-NEXT: [[TMP821:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1988:%.*]] = zext i16 [[TMP821]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1989:%.*]] = icmp sgt i32 [[CONV1987]], [[CONV1988]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1989]], label [[IF_THEN1991:%.*]], label [[IF_END1992:%.*]] +// SIMD-ONLY0: if.then1991: +// SIMD-ONLY0-NEXT: [[TMP822:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP822]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1992]] +// SIMD-ONLY0: if.end1992: +// SIMD-ONLY0-NEXT: [[TMP823:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1993:%.*]] = zext i16 [[TMP823]] to i32 +// SIMD-ONLY0-NEXT: [[TMP824:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1994:%.*]] = zext i16 [[TMP824]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1995:%.*]] = icmp slt i32 [[CONV1993]], [[CONV1994]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1995]], label [[IF_THEN1997:%.*]], label [[IF_END1998:%.*]] +// SIMD-ONLY0: if.then1997: +// SIMD-ONLY0-NEXT: [[TMP825:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP825]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1998]] +// SIMD-ONLY0: if.end1998: +// SIMD-ONLY0-NEXT: [[TMP826:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1999:%.*]] = zext i16 [[TMP826]] to i32 +// SIMD-ONLY0-NEXT: [[TMP827:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2000:%.*]] = zext i16 [[TMP827]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2001:%.*]] = icmp eq i32 [[CONV1999]], [[CONV2000]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2001]], label [[COND_TRUE2003:%.*]], label [[COND_FALSE2005:%.*]] +// SIMD-ONLY0: cond.true2003: +// SIMD-ONLY0-NEXT: [[TMP828:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2004:%.*]] = zext i16 [[TMP828]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2007:%.*]] +// SIMD-ONLY0: cond.false2005: +// SIMD-ONLY0-NEXT: [[TMP829:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2006:%.*]] = zext i16 [[TMP829]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2007]] +// SIMD-ONLY0: cond.end2007: +// SIMD-ONLY0-NEXT: [[COND2008:%.*]] = phi i32 [ [[CONV2004]], [[COND_TRUE2003]] ], [ [[CONV2006]], [[COND_FALSE2005]] ] +// SIMD-ONLY0-NEXT: [[CONV2009:%.*]] = trunc i32 [[COND2008]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2009]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP830:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2010:%.*]] = zext i16 [[TMP830]] to i32 +// SIMD-ONLY0-NEXT: [[TMP831:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2011:%.*]] = zext i16 [[TMP831]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2012:%.*]] = icmp eq i32 [[CONV2010]], [[CONV2011]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2012]], label [[COND_TRUE2014:%.*]], label [[COND_FALSE2016:%.*]] +// SIMD-ONLY0: cond.true2014: +// SIMD-ONLY0-NEXT: [[TMP832:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2015:%.*]] = zext i16 [[TMP832]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2018:%.*]] +// SIMD-ONLY0: cond.false2016: +// SIMD-ONLY0-NEXT: [[TMP833:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2017:%.*]] = zext i16 [[TMP833]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2018]] +// SIMD-ONLY0: cond.end2018: +// SIMD-ONLY0-NEXT: [[COND2019:%.*]] = phi i32 [ [[CONV2015]], [[COND_TRUE2014]] ], [ [[CONV2017]], [[COND_FALSE2016]] ] +// SIMD-ONLY0-NEXT: [[CONV2020:%.*]] = trunc i32 [[COND2019]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2020]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP834:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2021:%.*]] = zext i16 [[TMP834]] to i32 +// SIMD-ONLY0-NEXT: [[TMP835:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2022:%.*]] = zext i16 [[TMP835]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2023:%.*]] = icmp eq i32 [[CONV2021]], [[CONV2022]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2023]], label [[IF_THEN2025:%.*]], label [[IF_END2026:%.*]] +// SIMD-ONLY0: if.then2025: +// SIMD-ONLY0-NEXT: [[TMP836:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP836]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2026]] +// SIMD-ONLY0: if.end2026: +// SIMD-ONLY0-NEXT: [[TMP837:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2027:%.*]] = zext i16 [[TMP837]] to i32 +// SIMD-ONLY0-NEXT: [[TMP838:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2028:%.*]] = zext i16 [[TMP838]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2029:%.*]] = icmp eq i32 [[CONV2027]], [[CONV2028]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2029]], label [[IF_THEN2031:%.*]], label [[IF_END2032:%.*]] +// SIMD-ONLY0: if.then2031: +// SIMD-ONLY0-NEXT: [[TMP839:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP839]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2032]] +// SIMD-ONLY0: if.end2032: +// SIMD-ONLY0-NEXT: [[TMP840:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2033:%.*]] = sext i16 [[TMP840]] to i32 +// SIMD-ONLY0-NEXT: [[TMP841:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2034:%.*]] = sext i16 [[TMP841]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2035:%.*]] = icmp sgt i32 [[CONV2033]], [[CONV2034]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2035]], label [[COND_TRUE2037:%.*]], label [[COND_FALSE2039:%.*]] +// SIMD-ONLY0: cond.true2037: +// SIMD-ONLY0-NEXT: [[TMP842:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2038:%.*]] = sext i16 [[TMP842]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2041:%.*]] +// SIMD-ONLY0: cond.false2039: +// SIMD-ONLY0-NEXT: [[TMP843:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2040:%.*]] = sext i16 [[TMP843]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2041]] +// SIMD-ONLY0: cond.end2041: +// SIMD-ONLY0-NEXT: [[COND2042:%.*]] = phi i32 [ [[CONV2038]], [[COND_TRUE2037]] ], [ [[CONV2040]], [[COND_FALSE2039]] ] +// SIMD-ONLY0-NEXT: [[CONV2043:%.*]] = trunc i32 [[COND2042]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2043]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP844:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2044:%.*]] = sext i16 [[TMP844]] to i32 +// SIMD-ONLY0-NEXT: [[TMP845:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2045:%.*]] = sext i16 [[TMP845]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2046:%.*]] = icmp slt i32 [[CONV2044]], [[CONV2045]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2046]], label [[COND_TRUE2048:%.*]], label [[COND_FALSE2050:%.*]] +// SIMD-ONLY0: cond.true2048: +// SIMD-ONLY0-NEXT: [[TMP846:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2049:%.*]] = sext i16 [[TMP846]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2052:%.*]] +// SIMD-ONLY0: cond.false2050: +// SIMD-ONLY0-NEXT: [[TMP847:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2051:%.*]] = sext i16 [[TMP847]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2052]] +// SIMD-ONLY0: cond.end2052: +// SIMD-ONLY0-NEXT: [[COND2053:%.*]] = phi i32 [ [[CONV2049]], [[COND_TRUE2048]] ], [ [[CONV2051]], [[COND_FALSE2050]] ] +// SIMD-ONLY0-NEXT: [[CONV2054:%.*]] = trunc i32 [[COND2053]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2054]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP848:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2055:%.*]] = sext i16 [[TMP848]] to i32 +// SIMD-ONLY0-NEXT: [[TMP849:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2056:%.*]] = sext i16 [[TMP849]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2057:%.*]] = icmp sgt i32 [[CONV2055]], [[CONV2056]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2057]], label [[COND_TRUE2059:%.*]], label [[COND_FALSE2061:%.*]] +// SIMD-ONLY0: cond.true2059: +// SIMD-ONLY0-NEXT: [[TMP850:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2060:%.*]] = sext i16 [[TMP850]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2063:%.*]] +// SIMD-ONLY0: cond.false2061: +// SIMD-ONLY0-NEXT: [[TMP851:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2062:%.*]] = sext i16 [[TMP851]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2063]] +// SIMD-ONLY0: cond.end2063: +// SIMD-ONLY0-NEXT: [[COND2064:%.*]] = phi i32 [ [[CONV2060]], [[COND_TRUE2059]] ], [ [[CONV2062]], [[COND_FALSE2061]] ] +// SIMD-ONLY0-NEXT: [[CONV2065:%.*]] = trunc i32 [[COND2064]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2065]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP852:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2066:%.*]] = sext i16 [[TMP852]] to i32 +// SIMD-ONLY0-NEXT: [[TMP853:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2067:%.*]] = sext i16 [[TMP853]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2068:%.*]] = icmp slt i32 [[CONV2066]], [[CONV2067]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2068]], label [[COND_TRUE2070:%.*]], label [[COND_FALSE2072:%.*]] +// SIMD-ONLY0: cond.true2070: +// SIMD-ONLY0-NEXT: [[TMP854:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2071:%.*]] = sext i16 [[TMP854]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2074:%.*]] +// SIMD-ONLY0: cond.false2072: +// SIMD-ONLY0-NEXT: [[TMP855:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2073:%.*]] = sext i16 [[TMP855]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2074]] +// SIMD-ONLY0: cond.end2074: +// SIMD-ONLY0-NEXT: [[COND2075:%.*]] = phi i32 [ [[CONV2071]], [[COND_TRUE2070]] ], [ [[CONV2073]], [[COND_FALSE2072]] ] +// SIMD-ONLY0-NEXT: [[CONV2076:%.*]] = trunc i32 [[COND2075]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2076]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP856:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2077:%.*]] = sext i16 [[TMP856]] to i32 +// SIMD-ONLY0-NEXT: [[TMP857:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2078:%.*]] = sext i16 [[TMP857]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2079:%.*]] = icmp sgt i32 [[CONV2077]], [[CONV2078]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2079]], label [[IF_THEN2081:%.*]], label [[IF_END2082:%.*]] +// SIMD-ONLY0: if.then2081: +// SIMD-ONLY0-NEXT: [[TMP858:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP858]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2082]] +// SIMD-ONLY0: if.end2082: +// SIMD-ONLY0-NEXT: [[TMP859:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2083:%.*]] = sext i16 [[TMP859]] to i32 +// SIMD-ONLY0-NEXT: [[TMP860:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2084:%.*]] = sext i16 [[TMP860]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2085:%.*]] = icmp slt i32 [[CONV2083]], [[CONV2084]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2085]], label [[IF_THEN2087:%.*]], label [[IF_END2088:%.*]] +// SIMD-ONLY0: if.then2087: +// SIMD-ONLY0-NEXT: [[TMP861:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP861]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2088]] +// SIMD-ONLY0: if.end2088: +// SIMD-ONLY0-NEXT: [[TMP862:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2089:%.*]] = sext i16 [[TMP862]] to i32 +// SIMD-ONLY0-NEXT: [[TMP863:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2090:%.*]] = sext i16 [[TMP863]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2091:%.*]] = icmp sgt i32 [[CONV2089]], [[CONV2090]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2091]], label [[IF_THEN2093:%.*]], label [[IF_END2094:%.*]] +// SIMD-ONLY0: if.then2093: +// SIMD-ONLY0-NEXT: [[TMP864:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP864]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2094]] +// SIMD-ONLY0: if.end2094: +// SIMD-ONLY0-NEXT: [[TMP865:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2095:%.*]] = sext i16 [[TMP865]] to i32 +// SIMD-ONLY0-NEXT: [[TMP866:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2096:%.*]] = sext i16 [[TMP866]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2097:%.*]] = icmp slt i32 [[CONV2095]], [[CONV2096]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2097]], label [[IF_THEN2099:%.*]], label [[IF_END2100:%.*]] +// SIMD-ONLY0: if.then2099: +// SIMD-ONLY0-NEXT: [[TMP867:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP867]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2100]] +// SIMD-ONLY0: if.end2100: +// SIMD-ONLY0-NEXT: [[TMP868:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2101:%.*]] = sext i16 [[TMP868]] to i32 +// SIMD-ONLY0-NEXT: [[TMP869:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2102:%.*]] = sext i16 [[TMP869]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2103:%.*]] = icmp eq i32 [[CONV2101]], [[CONV2102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2103]], label [[COND_TRUE2105:%.*]], label [[COND_FALSE2107:%.*]] +// SIMD-ONLY0: cond.true2105: +// SIMD-ONLY0-NEXT: [[TMP870:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2106:%.*]] = sext i16 [[TMP870]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2109:%.*]] +// SIMD-ONLY0: cond.false2107: +// SIMD-ONLY0-NEXT: [[TMP871:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2108:%.*]] = sext i16 [[TMP871]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2109]] +// SIMD-ONLY0: cond.end2109: +// SIMD-ONLY0-NEXT: [[COND2110:%.*]] = phi i32 [ [[CONV2106]], [[COND_TRUE2105]] ], [ [[CONV2108]], [[COND_FALSE2107]] ] +// SIMD-ONLY0-NEXT: [[CONV2111:%.*]] = trunc i32 [[COND2110]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2111]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP872:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2112:%.*]] = sext i16 [[TMP872]] to i32 +// SIMD-ONLY0-NEXT: [[TMP873:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2113:%.*]] = sext i16 [[TMP873]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2114:%.*]] = icmp eq i32 [[CONV2112]], [[CONV2113]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2114]], label [[COND_TRUE2116:%.*]], label [[COND_FALSE2118:%.*]] +// SIMD-ONLY0: cond.true2116: +// SIMD-ONLY0-NEXT: [[TMP874:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2117:%.*]] = sext i16 [[TMP874]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2120:%.*]] +// SIMD-ONLY0: cond.false2118: +// SIMD-ONLY0-NEXT: [[TMP875:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2119:%.*]] = sext i16 [[TMP875]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2120]] +// SIMD-ONLY0: cond.end2120: +// SIMD-ONLY0-NEXT: [[COND2121:%.*]] = phi i32 [ [[CONV2117]], [[COND_TRUE2116]] ], [ [[CONV2119]], [[COND_FALSE2118]] ] +// SIMD-ONLY0-NEXT: [[CONV2122:%.*]] = trunc i32 [[COND2121]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2122]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP876:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2123:%.*]] = sext i16 [[TMP876]] to i32 +// SIMD-ONLY0-NEXT: [[TMP877:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2124:%.*]] = sext i16 [[TMP877]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2125:%.*]] = icmp eq i32 [[CONV2123]], [[CONV2124]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2125]], label [[IF_THEN2127:%.*]], label [[IF_END2128:%.*]] +// SIMD-ONLY0: if.then2127: +// SIMD-ONLY0-NEXT: [[TMP878:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP878]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2128]] +// SIMD-ONLY0: if.end2128: +// SIMD-ONLY0-NEXT: [[TMP879:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2129:%.*]] = sext i16 [[TMP879]] to i32 +// SIMD-ONLY0-NEXT: [[TMP880:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2130:%.*]] = sext i16 [[TMP880]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2131:%.*]] = icmp eq i32 [[CONV2129]], [[CONV2130]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2131]], label [[IF_THEN2133:%.*]], label [[IF_END2134:%.*]] +// SIMD-ONLY0: if.then2133: +// SIMD-ONLY0-NEXT: [[TMP881:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP881]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2134]] +// SIMD-ONLY0: if.end2134: +// SIMD-ONLY0-NEXT: [[TMP882:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2135:%.*]] = zext i16 [[TMP882]] to i32 +// SIMD-ONLY0-NEXT: [[TMP883:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2136:%.*]] = zext i16 [[TMP883]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2137:%.*]] = icmp sgt i32 [[CONV2135]], [[CONV2136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2137]], label [[COND_TRUE2139:%.*]], label [[COND_FALSE2141:%.*]] +// SIMD-ONLY0: cond.true2139: +// SIMD-ONLY0-NEXT: [[TMP884:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2140:%.*]] = zext i16 [[TMP884]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2143:%.*]] +// SIMD-ONLY0: cond.false2141: +// SIMD-ONLY0-NEXT: [[TMP885:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2142:%.*]] = zext i16 [[TMP885]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2143]] +// SIMD-ONLY0: cond.end2143: +// SIMD-ONLY0-NEXT: [[COND2144:%.*]] = phi i32 [ [[CONV2140]], [[COND_TRUE2139]] ], [ [[CONV2142]], [[COND_FALSE2141]] ] +// SIMD-ONLY0-NEXT: [[CONV2145:%.*]] = trunc i32 [[COND2144]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2145]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP886:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2146:%.*]] = zext i16 [[TMP886]] to i32 +// SIMD-ONLY0-NEXT: [[TMP887:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2147:%.*]] = zext i16 [[TMP887]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2148:%.*]] = icmp slt i32 [[CONV2146]], [[CONV2147]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2148]], label [[COND_TRUE2150:%.*]], label [[COND_FALSE2152:%.*]] +// SIMD-ONLY0: cond.true2150: +// SIMD-ONLY0-NEXT: [[TMP888:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2151:%.*]] = zext i16 [[TMP888]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2154:%.*]] +// SIMD-ONLY0: cond.false2152: +// SIMD-ONLY0-NEXT: [[TMP889:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2153:%.*]] = zext i16 [[TMP889]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2154]] +// SIMD-ONLY0: cond.end2154: +// SIMD-ONLY0-NEXT: [[COND2155:%.*]] = phi i32 [ [[CONV2151]], [[COND_TRUE2150]] ], [ [[CONV2153]], [[COND_FALSE2152]] ] +// SIMD-ONLY0-NEXT: [[CONV2156:%.*]] = trunc i32 [[COND2155]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2156]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP890:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2157:%.*]] = zext i16 [[TMP890]] to i32 +// SIMD-ONLY0-NEXT: [[TMP891:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2158:%.*]] = zext i16 [[TMP891]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2159:%.*]] = icmp sgt i32 [[CONV2157]], [[CONV2158]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2159]], label [[COND_TRUE2161:%.*]], label [[COND_FALSE2163:%.*]] +// SIMD-ONLY0: cond.true2161: +// SIMD-ONLY0-NEXT: [[TMP892:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2162:%.*]] = zext i16 [[TMP892]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2165:%.*]] +// SIMD-ONLY0: cond.false2163: +// SIMD-ONLY0-NEXT: [[TMP893:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2164:%.*]] = zext i16 [[TMP893]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2165]] +// SIMD-ONLY0: cond.end2165: +// SIMD-ONLY0-NEXT: [[COND2166:%.*]] = phi i32 [ [[CONV2162]], [[COND_TRUE2161]] ], [ [[CONV2164]], [[COND_FALSE2163]] ] +// SIMD-ONLY0-NEXT: [[CONV2167:%.*]] = trunc i32 [[COND2166]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2167]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP894:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2168:%.*]] = zext i16 [[TMP894]] to i32 +// SIMD-ONLY0-NEXT: [[TMP895:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2169:%.*]] = zext i16 [[TMP895]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2170:%.*]] = icmp slt i32 [[CONV2168]], [[CONV2169]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2170]], label [[COND_TRUE2172:%.*]], label [[COND_FALSE2174:%.*]] +// SIMD-ONLY0: cond.true2172: +// SIMD-ONLY0-NEXT: [[TMP896:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2173:%.*]] = zext i16 [[TMP896]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2176:%.*]] +// SIMD-ONLY0: cond.false2174: +// SIMD-ONLY0-NEXT: [[TMP897:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2175:%.*]] = zext i16 [[TMP897]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2176]] +// SIMD-ONLY0: cond.end2176: +// SIMD-ONLY0-NEXT: [[COND2177:%.*]] = phi i32 [ [[CONV2173]], [[COND_TRUE2172]] ], [ [[CONV2175]], [[COND_FALSE2174]] ] +// SIMD-ONLY0-NEXT: [[CONV2178:%.*]] = trunc i32 [[COND2177]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2178]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP898:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2179:%.*]] = zext i16 [[TMP898]] to i32 +// SIMD-ONLY0-NEXT: [[TMP899:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2180:%.*]] = zext i16 [[TMP899]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2181:%.*]] = icmp sgt i32 [[CONV2179]], [[CONV2180]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2181]], label [[IF_THEN2183:%.*]], label [[IF_END2184:%.*]] +// SIMD-ONLY0: if.then2183: +// SIMD-ONLY0-NEXT: [[TMP900:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP900]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2184]] +// SIMD-ONLY0: if.end2184: +// SIMD-ONLY0-NEXT: [[TMP901:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2185:%.*]] = zext i16 [[TMP901]] to i32 +// SIMD-ONLY0-NEXT: [[TMP902:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2186:%.*]] = zext i16 [[TMP902]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2187:%.*]] = icmp slt i32 [[CONV2185]], [[CONV2186]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2187]], label [[IF_THEN2189:%.*]], label [[IF_END2190:%.*]] +// SIMD-ONLY0: if.then2189: +// SIMD-ONLY0-NEXT: [[TMP903:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP903]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2190]] +// SIMD-ONLY0: if.end2190: +// SIMD-ONLY0-NEXT: [[TMP904:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2191:%.*]] = zext i16 [[TMP904]] to i32 +// SIMD-ONLY0-NEXT: [[TMP905:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2192:%.*]] = zext i16 [[TMP905]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2193:%.*]] = icmp sgt i32 [[CONV2191]], [[CONV2192]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2193]], label [[IF_THEN2195:%.*]], label [[IF_END2196:%.*]] +// SIMD-ONLY0: if.then2195: +// SIMD-ONLY0-NEXT: [[TMP906:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP906]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2196]] +// SIMD-ONLY0: if.end2196: +// SIMD-ONLY0-NEXT: [[TMP907:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2197:%.*]] = zext i16 [[TMP907]] to i32 +// SIMD-ONLY0-NEXT: [[TMP908:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2198:%.*]] = zext i16 [[TMP908]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2199:%.*]] = icmp slt i32 [[CONV2197]], [[CONV2198]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2199]], label [[IF_THEN2201:%.*]], label [[IF_END2202:%.*]] +// SIMD-ONLY0: if.then2201: +// SIMD-ONLY0-NEXT: [[TMP909:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP909]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2202]] +// SIMD-ONLY0: if.end2202: +// SIMD-ONLY0-NEXT: [[TMP910:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2203:%.*]] = zext i16 [[TMP910]] to i32 +// SIMD-ONLY0-NEXT: [[TMP911:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2204:%.*]] = zext i16 [[TMP911]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2205:%.*]] = icmp eq i32 [[CONV2203]], [[CONV2204]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2205]], label [[COND_TRUE2207:%.*]], label [[COND_FALSE2209:%.*]] +// SIMD-ONLY0: cond.true2207: +// SIMD-ONLY0-NEXT: [[TMP912:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2208:%.*]] = zext i16 [[TMP912]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2211:%.*]] +// SIMD-ONLY0: cond.false2209: +// SIMD-ONLY0-NEXT: [[TMP913:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2210:%.*]] = zext i16 [[TMP913]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2211]] +// SIMD-ONLY0: cond.end2211: +// SIMD-ONLY0-NEXT: [[COND2212:%.*]] = phi i32 [ [[CONV2208]], [[COND_TRUE2207]] ], [ [[CONV2210]], [[COND_FALSE2209]] ] +// SIMD-ONLY0-NEXT: [[CONV2213:%.*]] = trunc i32 [[COND2212]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2213]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP914:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2214:%.*]] = zext i16 [[TMP914]] to i32 +// SIMD-ONLY0-NEXT: [[TMP915:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2215:%.*]] = zext i16 [[TMP915]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2216:%.*]] = icmp eq i32 [[CONV2214]], [[CONV2215]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2216]], label [[COND_TRUE2218:%.*]], label [[COND_FALSE2220:%.*]] +// SIMD-ONLY0: cond.true2218: +// SIMD-ONLY0-NEXT: [[TMP916:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2219:%.*]] = zext i16 [[TMP916]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2222:%.*]] +// SIMD-ONLY0: cond.false2220: +// SIMD-ONLY0-NEXT: [[TMP917:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2221:%.*]] = zext i16 [[TMP917]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2222]] +// SIMD-ONLY0: cond.end2222: +// SIMD-ONLY0-NEXT: [[COND2223:%.*]] = phi i32 [ [[CONV2219]], [[COND_TRUE2218]] ], [ [[CONV2221]], [[COND_FALSE2220]] ] +// SIMD-ONLY0-NEXT: [[CONV2224:%.*]] = trunc i32 [[COND2223]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2224]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP918:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2225:%.*]] = zext i16 [[TMP918]] to i32 +// SIMD-ONLY0-NEXT: [[TMP919:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2226:%.*]] = zext i16 [[TMP919]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2227:%.*]] = icmp eq i32 [[CONV2225]], [[CONV2226]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2227]], label [[IF_THEN2229:%.*]], label [[IF_END2230:%.*]] +// SIMD-ONLY0: if.then2229: +// SIMD-ONLY0-NEXT: [[TMP920:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP920]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2230]] +// SIMD-ONLY0: if.end2230: +// SIMD-ONLY0-NEXT: [[TMP921:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2231:%.*]] = zext i16 [[TMP921]] to i32 +// SIMD-ONLY0-NEXT: [[TMP922:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2232:%.*]] = zext i16 [[TMP922]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2233:%.*]] = icmp eq i32 [[CONV2231]], [[CONV2232]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2233]], label [[IF_THEN2235:%.*]], label [[IF_END2236:%.*]] +// SIMD-ONLY0: if.then2235: +// SIMD-ONLY0-NEXT: [[TMP923:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP923]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2236]] +// SIMD-ONLY0: if.end2236: +// SIMD-ONLY0-NEXT: [[TMP924:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2237:%.*]] = sext i16 [[TMP924]] to i32 +// SIMD-ONLY0-NEXT: [[TMP925:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2238:%.*]] = sext i16 [[TMP925]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2239:%.*]] = icmp sgt i32 [[CONV2237]], [[CONV2238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2239]], label [[COND_TRUE2241:%.*]], label [[COND_FALSE2243:%.*]] +// SIMD-ONLY0: cond.true2241: +// SIMD-ONLY0-NEXT: [[TMP926:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2242:%.*]] = sext i16 [[TMP926]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2245:%.*]] +// SIMD-ONLY0: cond.false2243: +// SIMD-ONLY0-NEXT: [[TMP927:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2244:%.*]] = sext i16 [[TMP927]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2245]] +// SIMD-ONLY0: cond.end2245: +// SIMD-ONLY0-NEXT: [[COND2246:%.*]] = phi i32 [ [[CONV2242]], [[COND_TRUE2241]] ], [ [[CONV2244]], [[COND_FALSE2243]] ] +// SIMD-ONLY0-NEXT: [[CONV2247:%.*]] = trunc i32 [[COND2246]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2247]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP928:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2248:%.*]] = sext i16 [[TMP928]] to i32 +// SIMD-ONLY0-NEXT: [[TMP929:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2249:%.*]] = sext i16 [[TMP929]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2250:%.*]] = icmp slt i32 [[CONV2248]], [[CONV2249]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2250]], label [[COND_TRUE2252:%.*]], label [[COND_FALSE2254:%.*]] +// SIMD-ONLY0: cond.true2252: +// SIMD-ONLY0-NEXT: [[TMP930:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2253:%.*]] = sext i16 [[TMP930]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2256:%.*]] +// SIMD-ONLY0: cond.false2254: +// SIMD-ONLY0-NEXT: [[TMP931:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2255:%.*]] = sext i16 [[TMP931]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2256]] +// SIMD-ONLY0: cond.end2256: +// SIMD-ONLY0-NEXT: [[COND2257:%.*]] = phi i32 [ [[CONV2253]], [[COND_TRUE2252]] ], [ [[CONV2255]], [[COND_FALSE2254]] ] +// SIMD-ONLY0-NEXT: [[CONV2258:%.*]] = trunc i32 [[COND2257]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2258]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP932:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2259:%.*]] = sext i16 [[TMP932]] to i32 +// SIMD-ONLY0-NEXT: [[TMP933:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2260:%.*]] = sext i16 [[TMP933]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2261:%.*]] = icmp sgt i32 [[CONV2259]], [[CONV2260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2261]], label [[COND_TRUE2263:%.*]], label [[COND_FALSE2265:%.*]] +// SIMD-ONLY0: cond.true2263: +// SIMD-ONLY0-NEXT: [[TMP934:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2264:%.*]] = sext i16 [[TMP934]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2267:%.*]] +// SIMD-ONLY0: cond.false2265: +// SIMD-ONLY0-NEXT: [[TMP935:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2266:%.*]] = sext i16 [[TMP935]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2267]] +// SIMD-ONLY0: cond.end2267: +// SIMD-ONLY0-NEXT: [[COND2268:%.*]] = phi i32 [ [[CONV2264]], [[COND_TRUE2263]] ], [ [[CONV2266]], [[COND_FALSE2265]] ] +// SIMD-ONLY0-NEXT: [[CONV2269:%.*]] = trunc i32 [[COND2268]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2269]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP936:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2270:%.*]] = sext i16 [[TMP936]] to i32 +// SIMD-ONLY0-NEXT: [[TMP937:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2271:%.*]] = sext i16 [[TMP937]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2272:%.*]] = icmp slt i32 [[CONV2270]], [[CONV2271]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2272]], label [[COND_TRUE2274:%.*]], label [[COND_FALSE2276:%.*]] +// SIMD-ONLY0: cond.true2274: +// SIMD-ONLY0-NEXT: [[TMP938:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2275:%.*]] = sext i16 [[TMP938]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2278:%.*]] +// SIMD-ONLY0: cond.false2276: +// SIMD-ONLY0-NEXT: [[TMP939:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2277:%.*]] = sext i16 [[TMP939]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2278]] +// SIMD-ONLY0: cond.end2278: +// SIMD-ONLY0-NEXT: [[COND2279:%.*]] = phi i32 [ [[CONV2275]], [[COND_TRUE2274]] ], [ [[CONV2277]], [[COND_FALSE2276]] ] +// SIMD-ONLY0-NEXT: [[CONV2280:%.*]] = trunc i32 [[COND2279]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2280]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP940:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2281:%.*]] = sext i16 [[TMP940]] to i32 +// SIMD-ONLY0-NEXT: [[TMP941:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2282:%.*]] = sext i16 [[TMP941]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2283:%.*]] = icmp sgt i32 [[CONV2281]], [[CONV2282]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2283]], label [[IF_THEN2285:%.*]], label [[IF_END2286:%.*]] +// SIMD-ONLY0: if.then2285: +// SIMD-ONLY0-NEXT: [[TMP942:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP942]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2286]] +// SIMD-ONLY0: if.end2286: +// SIMD-ONLY0-NEXT: [[TMP943:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2287:%.*]] = sext i16 [[TMP943]] to i32 +// SIMD-ONLY0-NEXT: [[TMP944:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2288:%.*]] = sext i16 [[TMP944]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2289:%.*]] = icmp slt i32 [[CONV2287]], [[CONV2288]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2289]], label [[IF_THEN2291:%.*]], label [[IF_END2292:%.*]] +// SIMD-ONLY0: if.then2291: +// SIMD-ONLY0-NEXT: [[TMP945:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP945]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2292]] +// SIMD-ONLY0: if.end2292: +// SIMD-ONLY0-NEXT: [[TMP946:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2293:%.*]] = sext i16 [[TMP946]] to i32 +// SIMD-ONLY0-NEXT: [[TMP947:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2294:%.*]] = sext i16 [[TMP947]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2295:%.*]] = icmp sgt i32 [[CONV2293]], [[CONV2294]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2295]], label [[IF_THEN2297:%.*]], label [[IF_END2298:%.*]] +// SIMD-ONLY0: if.then2297: +// SIMD-ONLY0-NEXT: [[TMP948:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP948]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2298]] +// SIMD-ONLY0: if.end2298: +// SIMD-ONLY0-NEXT: [[TMP949:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2299:%.*]] = sext i16 [[TMP949]] to i32 +// SIMD-ONLY0-NEXT: [[TMP950:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2300:%.*]] = sext i16 [[TMP950]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2301:%.*]] = icmp slt i32 [[CONV2299]], [[CONV2300]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2301]], label [[IF_THEN2303:%.*]], label [[IF_END2304:%.*]] +// SIMD-ONLY0: if.then2303: +// SIMD-ONLY0-NEXT: [[TMP951:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP951]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2304]] +// SIMD-ONLY0: if.end2304: +// SIMD-ONLY0-NEXT: [[TMP952:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2305:%.*]] = sext i16 [[TMP952]] to i32 +// SIMD-ONLY0-NEXT: [[TMP953:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2306:%.*]] = sext i16 [[TMP953]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2307:%.*]] = icmp eq i32 [[CONV2305]], [[CONV2306]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2307]], label [[COND_TRUE2309:%.*]], label [[COND_FALSE2311:%.*]] +// SIMD-ONLY0: cond.true2309: +// SIMD-ONLY0-NEXT: [[TMP954:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2310:%.*]] = sext i16 [[TMP954]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2313:%.*]] +// SIMD-ONLY0: cond.false2311: +// SIMD-ONLY0-NEXT: [[TMP955:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2312:%.*]] = sext i16 [[TMP955]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2313]] +// SIMD-ONLY0: cond.end2313: +// SIMD-ONLY0-NEXT: [[COND2314:%.*]] = phi i32 [ [[CONV2310]], [[COND_TRUE2309]] ], [ [[CONV2312]], [[COND_FALSE2311]] ] +// SIMD-ONLY0-NEXT: [[CONV2315:%.*]] = trunc i32 [[COND2314]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2315]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP956:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2316:%.*]] = sext i16 [[TMP956]] to i32 +// SIMD-ONLY0-NEXT: [[TMP957:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2317:%.*]] = sext i16 [[TMP957]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2318:%.*]] = icmp eq i32 [[CONV2316]], [[CONV2317]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2318]], label [[COND_TRUE2320:%.*]], label [[COND_FALSE2322:%.*]] +// SIMD-ONLY0: cond.true2320: +// SIMD-ONLY0-NEXT: [[TMP958:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2321:%.*]] = sext i16 [[TMP958]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2324:%.*]] +// SIMD-ONLY0: cond.false2322: +// SIMD-ONLY0-NEXT: [[TMP959:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2323:%.*]] = sext i16 [[TMP959]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2324]] +// SIMD-ONLY0: cond.end2324: +// SIMD-ONLY0-NEXT: [[COND2325:%.*]] = phi i32 [ [[CONV2321]], [[COND_TRUE2320]] ], [ [[CONV2323]], [[COND_FALSE2322]] ] +// SIMD-ONLY0-NEXT: [[CONV2326:%.*]] = trunc i32 [[COND2325]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2326]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP960:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2327:%.*]] = sext i16 [[TMP960]] to i32 +// SIMD-ONLY0-NEXT: [[TMP961:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2328:%.*]] = sext i16 [[TMP961]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2329:%.*]] = icmp eq i32 [[CONV2327]], [[CONV2328]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2329]], label [[IF_THEN2331:%.*]], label [[IF_END2332:%.*]] +// SIMD-ONLY0: if.then2331: +// SIMD-ONLY0-NEXT: [[TMP962:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP962]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2332]] +// SIMD-ONLY0: if.end2332: +// SIMD-ONLY0-NEXT: [[TMP963:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2333:%.*]] = sext i16 [[TMP963]] to i32 +// SIMD-ONLY0-NEXT: [[TMP964:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2334:%.*]] = sext i16 [[TMP964]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2335:%.*]] = icmp eq i32 [[CONV2333]], [[CONV2334]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2335]], label [[IF_THEN2337:%.*]], label [[IF_END2338:%.*]] +// SIMD-ONLY0: if.then2337: +// SIMD-ONLY0-NEXT: [[TMP965:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP965]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2338]] +// SIMD-ONLY0: if.end2338: +// SIMD-ONLY0-NEXT: [[TMP966:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2339:%.*]] = zext i16 [[TMP966]] to i32 +// SIMD-ONLY0-NEXT: [[TMP967:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2340:%.*]] = zext i16 [[TMP967]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2341:%.*]] = icmp sgt i32 [[CONV2339]], [[CONV2340]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2341]], label [[COND_TRUE2343:%.*]], label [[COND_FALSE2345:%.*]] +// SIMD-ONLY0: cond.true2343: +// SIMD-ONLY0-NEXT: [[TMP968:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2344:%.*]] = zext i16 [[TMP968]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2347:%.*]] +// SIMD-ONLY0: cond.false2345: +// SIMD-ONLY0-NEXT: [[TMP969:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2346:%.*]] = zext i16 [[TMP969]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2347]] +// SIMD-ONLY0: cond.end2347: +// SIMD-ONLY0-NEXT: [[COND2348:%.*]] = phi i32 [ [[CONV2344]], [[COND_TRUE2343]] ], [ [[CONV2346]], [[COND_FALSE2345]] ] +// SIMD-ONLY0-NEXT: [[CONV2349:%.*]] = trunc i32 [[COND2348]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2349]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP970:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2350:%.*]] = zext i16 [[TMP970]] to i32 +// SIMD-ONLY0-NEXT: [[TMP971:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2351:%.*]] = zext i16 [[TMP971]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2352:%.*]] = icmp slt i32 [[CONV2350]], [[CONV2351]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2352]], label [[COND_TRUE2354:%.*]], label [[COND_FALSE2356:%.*]] +// SIMD-ONLY0: cond.true2354: +// SIMD-ONLY0-NEXT: [[TMP972:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2355:%.*]] = zext i16 [[TMP972]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2358:%.*]] +// SIMD-ONLY0: cond.false2356: +// SIMD-ONLY0-NEXT: [[TMP973:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2357:%.*]] = zext i16 [[TMP973]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2358]] +// SIMD-ONLY0: cond.end2358: +// SIMD-ONLY0-NEXT: [[COND2359:%.*]] = phi i32 [ [[CONV2355]], [[COND_TRUE2354]] ], [ [[CONV2357]], [[COND_FALSE2356]] ] +// SIMD-ONLY0-NEXT: [[CONV2360:%.*]] = trunc i32 [[COND2359]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2360]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP974:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2361:%.*]] = zext i16 [[TMP974]] to i32 +// SIMD-ONLY0-NEXT: [[TMP975:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2362:%.*]] = zext i16 [[TMP975]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2363:%.*]] = icmp sgt i32 [[CONV2361]], [[CONV2362]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2363]], label [[COND_TRUE2365:%.*]], label [[COND_FALSE2367:%.*]] +// SIMD-ONLY0: cond.true2365: +// SIMD-ONLY0-NEXT: [[TMP976:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2366:%.*]] = zext i16 [[TMP976]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2369:%.*]] +// SIMD-ONLY0: cond.false2367: +// SIMD-ONLY0-NEXT: [[TMP977:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2368:%.*]] = zext i16 [[TMP977]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2369]] +// SIMD-ONLY0: cond.end2369: +// SIMD-ONLY0-NEXT: [[COND2370:%.*]] = phi i32 [ [[CONV2366]], [[COND_TRUE2365]] ], [ [[CONV2368]], [[COND_FALSE2367]] ] +// SIMD-ONLY0-NEXT: [[CONV2371:%.*]] = trunc i32 [[COND2370]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2371]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP978:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2372:%.*]] = zext i16 [[TMP978]] to i32 +// SIMD-ONLY0-NEXT: [[TMP979:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2373:%.*]] = zext i16 [[TMP979]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2374:%.*]] = icmp slt i32 [[CONV2372]], [[CONV2373]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2374]], label [[COND_TRUE2376:%.*]], label [[COND_FALSE2378:%.*]] +// SIMD-ONLY0: cond.true2376: +// SIMD-ONLY0-NEXT: [[TMP980:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2377:%.*]] = zext i16 [[TMP980]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2380:%.*]] +// SIMD-ONLY0: cond.false2378: +// SIMD-ONLY0-NEXT: [[TMP981:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2379:%.*]] = zext i16 [[TMP981]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2380]] +// SIMD-ONLY0: cond.end2380: +// SIMD-ONLY0-NEXT: [[COND2381:%.*]] = phi i32 [ [[CONV2377]], [[COND_TRUE2376]] ], [ [[CONV2379]], [[COND_FALSE2378]] ] +// SIMD-ONLY0-NEXT: [[CONV2382:%.*]] = trunc i32 [[COND2381]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2382]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP982:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2383:%.*]] = zext i16 [[TMP982]] to i32 +// SIMD-ONLY0-NEXT: [[TMP983:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2384:%.*]] = zext i16 [[TMP983]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2385:%.*]] = icmp sgt i32 [[CONV2383]], [[CONV2384]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2385]], label [[IF_THEN2387:%.*]], label [[IF_END2388:%.*]] +// SIMD-ONLY0: if.then2387: +// SIMD-ONLY0-NEXT: [[TMP984:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP984]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2388]] +// SIMD-ONLY0: if.end2388: +// SIMD-ONLY0-NEXT: [[TMP985:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2389:%.*]] = zext i16 [[TMP985]] to i32 +// SIMD-ONLY0-NEXT: [[TMP986:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2390:%.*]] = zext i16 [[TMP986]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2391:%.*]] = icmp slt i32 [[CONV2389]], [[CONV2390]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2391]], label [[IF_THEN2393:%.*]], label [[IF_END2394:%.*]] +// SIMD-ONLY0: if.then2393: +// SIMD-ONLY0-NEXT: [[TMP987:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP987]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2394]] +// SIMD-ONLY0: if.end2394: +// SIMD-ONLY0-NEXT: [[TMP988:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2395:%.*]] = zext i16 [[TMP988]] to i32 +// SIMD-ONLY0-NEXT: [[TMP989:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2396:%.*]] = zext i16 [[TMP989]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2397:%.*]] = icmp sgt i32 [[CONV2395]], [[CONV2396]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2397]], label [[IF_THEN2399:%.*]], label [[IF_END2400:%.*]] +// SIMD-ONLY0: if.then2399: +// SIMD-ONLY0-NEXT: [[TMP990:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP990]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2400]] +// SIMD-ONLY0: if.end2400: +// SIMD-ONLY0-NEXT: [[TMP991:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2401:%.*]] = zext i16 [[TMP991]] to i32 +// SIMD-ONLY0-NEXT: [[TMP992:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2402:%.*]] = zext i16 [[TMP992]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2403:%.*]] = icmp slt i32 [[CONV2401]], [[CONV2402]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2403]], label [[IF_THEN2405:%.*]], label [[IF_END2406:%.*]] +// SIMD-ONLY0: if.then2405: +// SIMD-ONLY0-NEXT: [[TMP993:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP993]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2406]] +// SIMD-ONLY0: if.end2406: +// SIMD-ONLY0-NEXT: [[TMP994:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2407:%.*]] = zext i16 [[TMP994]] to i32 +// SIMD-ONLY0-NEXT: [[TMP995:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2408:%.*]] = zext i16 [[TMP995]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2409:%.*]] = icmp eq i32 [[CONV2407]], [[CONV2408]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2409]], label [[COND_TRUE2411:%.*]], label [[COND_FALSE2413:%.*]] +// SIMD-ONLY0: cond.true2411: +// SIMD-ONLY0-NEXT: [[TMP996:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2412:%.*]] = zext i16 [[TMP996]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2415:%.*]] +// SIMD-ONLY0: cond.false2413: +// SIMD-ONLY0-NEXT: [[TMP997:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2414:%.*]] = zext i16 [[TMP997]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2415]] +// SIMD-ONLY0: cond.end2415: +// SIMD-ONLY0-NEXT: [[COND2416:%.*]] = phi i32 [ [[CONV2412]], [[COND_TRUE2411]] ], [ [[CONV2414]], [[COND_FALSE2413]] ] +// SIMD-ONLY0-NEXT: [[CONV2417:%.*]] = trunc i32 [[COND2416]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2417]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP998:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2418:%.*]] = zext i16 [[TMP998]] to i32 +// SIMD-ONLY0-NEXT: [[TMP999:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2419:%.*]] = zext i16 [[TMP999]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2420:%.*]] = icmp eq i32 [[CONV2418]], [[CONV2419]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2420]], label [[COND_TRUE2422:%.*]], label [[COND_FALSE2424:%.*]] +// SIMD-ONLY0: cond.true2422: +// SIMD-ONLY0-NEXT: [[TMP1000:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2423:%.*]] = zext i16 [[TMP1000]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2426:%.*]] +// SIMD-ONLY0: cond.false2424: +// SIMD-ONLY0-NEXT: [[TMP1001:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2425:%.*]] = zext i16 [[TMP1001]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END2426]] +// SIMD-ONLY0: cond.end2426: +// SIMD-ONLY0-NEXT: [[COND2427:%.*]] = phi i32 [ [[CONV2423]], [[COND_TRUE2422]] ], [ [[CONV2425]], [[COND_FALSE2424]] ] +// SIMD-ONLY0-NEXT: [[CONV2428:%.*]] = trunc i32 [[COND2427]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2428]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1002:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2429:%.*]] = zext i16 [[TMP1002]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1003:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2430:%.*]] = zext i16 [[TMP1003]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2431:%.*]] = icmp eq i32 [[CONV2429]], [[CONV2430]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2431]], label [[IF_THEN2433:%.*]], label [[IF_END2434:%.*]] +// SIMD-ONLY0: if.then2433: +// SIMD-ONLY0-NEXT: [[TMP1004:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1004]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2434]] +// SIMD-ONLY0: if.end2434: +// SIMD-ONLY0-NEXT: [[TMP1005:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2435:%.*]] = zext i16 [[TMP1005]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1006:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2436:%.*]] = zext i16 [[TMP1006]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2437:%.*]] = icmp eq i32 [[CONV2435]], [[CONV2436]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2437]], label [[IF_THEN2439:%.*]], label [[IF_END2440:%.*]] +// SIMD-ONLY0: if.then2439: +// SIMD-ONLY0-NEXT: [[TMP1007:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1007]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2440]] +// SIMD-ONLY0: if.end2440: +// SIMD-ONLY0-NEXT: [[TMP1008:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1009:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2441:%.*]] = icmp sgt i32 [[TMP1008]], [[TMP1009]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2441]], label [[COND_TRUE2443:%.*]], label [[COND_FALSE2444:%.*]] +// SIMD-ONLY0: cond.true2443: +// SIMD-ONLY0-NEXT: [[TMP1010:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2445:%.*]] +// SIMD-ONLY0: cond.false2444: +// SIMD-ONLY0-NEXT: [[TMP1011:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2445]] +// SIMD-ONLY0: cond.end2445: +// SIMD-ONLY0-NEXT: [[COND2446:%.*]] = phi i32 [ [[TMP1010]], [[COND_TRUE2443]] ], [ [[TMP1011]], [[COND_FALSE2444]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2446]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1012:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1013:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2447:%.*]] = icmp slt i32 [[TMP1012]], [[TMP1013]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2447]], label [[COND_TRUE2449:%.*]], label [[COND_FALSE2450:%.*]] +// SIMD-ONLY0: cond.true2449: +// SIMD-ONLY0-NEXT: [[TMP1014:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2451:%.*]] +// SIMD-ONLY0: cond.false2450: +// SIMD-ONLY0-NEXT: [[TMP1015:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2451]] +// SIMD-ONLY0: cond.end2451: +// SIMD-ONLY0-NEXT: [[COND2452:%.*]] = phi i32 [ [[TMP1014]], [[COND_TRUE2449]] ], [ [[TMP1015]], [[COND_FALSE2450]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2452]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1016:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1017:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2453:%.*]] = icmp sgt i32 [[TMP1016]], [[TMP1017]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2453]], label [[COND_TRUE2455:%.*]], label [[COND_FALSE2456:%.*]] +// SIMD-ONLY0: cond.true2455: +// SIMD-ONLY0-NEXT: [[TMP1018:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2457:%.*]] +// SIMD-ONLY0: cond.false2456: +// SIMD-ONLY0-NEXT: [[TMP1019:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2457]] +// SIMD-ONLY0: cond.end2457: +// SIMD-ONLY0-NEXT: [[COND2458:%.*]] = phi i32 [ [[TMP1018]], [[COND_TRUE2455]] ], [ [[TMP1019]], [[COND_FALSE2456]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2458]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1020:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1021:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2459:%.*]] = icmp slt i32 [[TMP1020]], [[TMP1021]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2459]], label [[COND_TRUE2461:%.*]], label [[COND_FALSE2462:%.*]] +// SIMD-ONLY0: cond.true2461: +// SIMD-ONLY0-NEXT: [[TMP1022:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2463:%.*]] +// SIMD-ONLY0: cond.false2462: +// SIMD-ONLY0-NEXT: [[TMP1023:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2463]] +// SIMD-ONLY0: cond.end2463: +// SIMD-ONLY0-NEXT: [[COND2464:%.*]] = phi i32 [ [[TMP1022]], [[COND_TRUE2461]] ], [ [[TMP1023]], [[COND_FALSE2462]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2464]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1024:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1025:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2465:%.*]] = icmp sgt i32 [[TMP1024]], [[TMP1025]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2465]], label [[IF_THEN2467:%.*]], label [[IF_END2468:%.*]] +// SIMD-ONLY0: if.then2467: +// SIMD-ONLY0-NEXT: [[TMP1026:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1026]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2468]] +// SIMD-ONLY0: if.end2468: +// SIMD-ONLY0-NEXT: [[TMP1027:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1028:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2469:%.*]] = icmp slt i32 [[TMP1027]], [[TMP1028]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2469]], label [[IF_THEN2471:%.*]], label [[IF_END2472:%.*]] +// SIMD-ONLY0: if.then2471: +// SIMD-ONLY0-NEXT: [[TMP1029:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1029]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2472]] +// SIMD-ONLY0: if.end2472: +// SIMD-ONLY0-NEXT: [[TMP1030:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1031:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2473:%.*]] = icmp sgt i32 [[TMP1030]], [[TMP1031]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2473]], label [[IF_THEN2475:%.*]], label [[IF_END2476:%.*]] +// SIMD-ONLY0: if.then2475: +// SIMD-ONLY0-NEXT: [[TMP1032:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1032]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2476]] +// SIMD-ONLY0: if.end2476: +// SIMD-ONLY0-NEXT: [[TMP1033:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1034:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2477:%.*]] = icmp slt i32 [[TMP1033]], [[TMP1034]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2477]], label [[IF_THEN2479:%.*]], label [[IF_END2480:%.*]] +// SIMD-ONLY0: if.then2479: +// SIMD-ONLY0-NEXT: [[TMP1035:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1035]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2480]] +// SIMD-ONLY0: if.end2480: +// SIMD-ONLY0-NEXT: [[TMP1036:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1037:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2481:%.*]] = icmp eq i32 [[TMP1036]], [[TMP1037]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2481]], label [[COND_TRUE2483:%.*]], label [[COND_FALSE2484:%.*]] +// SIMD-ONLY0: cond.true2483: +// SIMD-ONLY0-NEXT: [[TMP1038:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2485:%.*]] +// SIMD-ONLY0: cond.false2484: +// SIMD-ONLY0-NEXT: [[TMP1039:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2485]] +// SIMD-ONLY0: cond.end2485: +// SIMD-ONLY0-NEXT: [[COND2486:%.*]] = phi i32 [ [[TMP1038]], [[COND_TRUE2483]] ], [ [[TMP1039]], [[COND_FALSE2484]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2486]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1040:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1041:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2487:%.*]] = icmp eq i32 [[TMP1040]], [[TMP1041]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2487]], label [[COND_TRUE2489:%.*]], label [[COND_FALSE2490:%.*]] +// SIMD-ONLY0: cond.true2489: +// SIMD-ONLY0-NEXT: [[TMP1042:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2491:%.*]] +// SIMD-ONLY0: cond.false2490: +// SIMD-ONLY0-NEXT: [[TMP1043:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2491]] +// SIMD-ONLY0: cond.end2491: +// SIMD-ONLY0-NEXT: [[COND2492:%.*]] = phi i32 [ [[TMP1042]], [[COND_TRUE2489]] ], [ [[TMP1043]], [[COND_FALSE2490]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2492]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1044:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1045:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2493:%.*]] = icmp eq i32 [[TMP1044]], [[TMP1045]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2493]], label [[IF_THEN2495:%.*]], label [[IF_END2496:%.*]] +// SIMD-ONLY0: if.then2495: +// SIMD-ONLY0-NEXT: [[TMP1046:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1046]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2496]] +// SIMD-ONLY0: if.end2496: +// SIMD-ONLY0-NEXT: [[TMP1047:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1048:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2497:%.*]] = icmp eq i32 [[TMP1047]], [[TMP1048]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2497]], label [[IF_THEN2499:%.*]], label [[IF_END2500:%.*]] +// SIMD-ONLY0: if.then2499: +// SIMD-ONLY0-NEXT: [[TMP1049:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1049]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2500]] +// SIMD-ONLY0: if.end2500: +// SIMD-ONLY0-NEXT: [[TMP1050:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1051:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2501:%.*]] = icmp ugt i32 [[TMP1050]], [[TMP1051]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2501]], label [[COND_TRUE2503:%.*]], label [[COND_FALSE2504:%.*]] +// SIMD-ONLY0: cond.true2503: +// SIMD-ONLY0-NEXT: [[TMP1052:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2505:%.*]] +// SIMD-ONLY0: cond.false2504: +// SIMD-ONLY0-NEXT: [[TMP1053:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2505]] +// SIMD-ONLY0: cond.end2505: +// SIMD-ONLY0-NEXT: [[COND2506:%.*]] = phi i32 [ [[TMP1052]], [[COND_TRUE2503]] ], [ [[TMP1053]], [[COND_FALSE2504]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2506]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1054:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1055:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2507:%.*]] = icmp ult i32 [[TMP1054]], [[TMP1055]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2507]], label [[COND_TRUE2509:%.*]], label [[COND_FALSE2510:%.*]] +// SIMD-ONLY0: cond.true2509: +// SIMD-ONLY0-NEXT: [[TMP1056:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2511:%.*]] +// SIMD-ONLY0: cond.false2510: +// SIMD-ONLY0-NEXT: [[TMP1057:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2511]] +// SIMD-ONLY0: cond.end2511: +// SIMD-ONLY0-NEXT: [[COND2512:%.*]] = phi i32 [ [[TMP1056]], [[COND_TRUE2509]] ], [ [[TMP1057]], [[COND_FALSE2510]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2512]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1058:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1059:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2513:%.*]] = icmp ugt i32 [[TMP1058]], [[TMP1059]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2513]], label [[COND_TRUE2515:%.*]], label [[COND_FALSE2516:%.*]] +// SIMD-ONLY0: cond.true2515: +// SIMD-ONLY0-NEXT: [[TMP1060:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2517:%.*]] +// SIMD-ONLY0: cond.false2516: +// SIMD-ONLY0-NEXT: [[TMP1061:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2517]] +// SIMD-ONLY0: cond.end2517: +// SIMD-ONLY0-NEXT: [[COND2518:%.*]] = phi i32 [ [[TMP1060]], [[COND_TRUE2515]] ], [ [[TMP1061]], [[COND_FALSE2516]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2518]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1062:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1063:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2519:%.*]] = icmp ult i32 [[TMP1062]], [[TMP1063]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2519]], label [[COND_TRUE2521:%.*]], label [[COND_FALSE2522:%.*]] +// SIMD-ONLY0: cond.true2521: +// SIMD-ONLY0-NEXT: [[TMP1064:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2523:%.*]] +// SIMD-ONLY0: cond.false2522: +// SIMD-ONLY0-NEXT: [[TMP1065:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2523]] +// SIMD-ONLY0: cond.end2523: +// SIMD-ONLY0-NEXT: [[COND2524:%.*]] = phi i32 [ [[TMP1064]], [[COND_TRUE2521]] ], [ [[TMP1065]], [[COND_FALSE2522]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2524]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1066:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1067:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2525:%.*]] = icmp ugt i32 [[TMP1066]], [[TMP1067]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2525]], label [[IF_THEN2527:%.*]], label [[IF_END2528:%.*]] +// SIMD-ONLY0: if.then2527: +// SIMD-ONLY0-NEXT: [[TMP1068:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1068]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2528]] +// SIMD-ONLY0: if.end2528: +// SIMD-ONLY0-NEXT: [[TMP1069:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1070:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2529:%.*]] = icmp ult i32 [[TMP1069]], [[TMP1070]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2529]], label [[IF_THEN2531:%.*]], label [[IF_END2532:%.*]] +// SIMD-ONLY0: if.then2531: +// SIMD-ONLY0-NEXT: [[TMP1071:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1071]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2532]] +// SIMD-ONLY0: if.end2532: +// SIMD-ONLY0-NEXT: [[TMP1072:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1073:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2533:%.*]] = icmp ugt i32 [[TMP1072]], [[TMP1073]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2533]], label [[IF_THEN2535:%.*]], label [[IF_END2536:%.*]] +// SIMD-ONLY0: if.then2535: +// SIMD-ONLY0-NEXT: [[TMP1074:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1074]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2536]] +// SIMD-ONLY0: if.end2536: +// SIMD-ONLY0-NEXT: [[TMP1075:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1076:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2537:%.*]] = icmp ult i32 [[TMP1075]], [[TMP1076]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2537]], label [[IF_THEN2539:%.*]], label [[IF_END2540:%.*]] +// SIMD-ONLY0: if.then2539: +// SIMD-ONLY0-NEXT: [[TMP1077:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1077]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2540]] +// SIMD-ONLY0: if.end2540: +// SIMD-ONLY0-NEXT: [[TMP1078:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1079:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2541:%.*]] = icmp eq i32 [[TMP1078]], [[TMP1079]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2541]], label [[COND_TRUE2543:%.*]], label [[COND_FALSE2544:%.*]] +// SIMD-ONLY0: cond.true2543: +// SIMD-ONLY0-NEXT: [[TMP1080:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2545:%.*]] +// SIMD-ONLY0: cond.false2544: +// SIMD-ONLY0-NEXT: [[TMP1081:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2545]] +// SIMD-ONLY0: cond.end2545: +// SIMD-ONLY0-NEXT: [[COND2546:%.*]] = phi i32 [ [[TMP1080]], [[COND_TRUE2543]] ], [ [[TMP1081]], [[COND_FALSE2544]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2546]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1082:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1083:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2547:%.*]] = icmp eq i32 [[TMP1082]], [[TMP1083]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2547]], label [[COND_TRUE2549:%.*]], label [[COND_FALSE2550:%.*]] +// SIMD-ONLY0: cond.true2549: +// SIMD-ONLY0-NEXT: [[TMP1084:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2551:%.*]] +// SIMD-ONLY0: cond.false2550: +// SIMD-ONLY0-NEXT: [[TMP1085:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2551]] +// SIMD-ONLY0: cond.end2551: +// SIMD-ONLY0-NEXT: [[COND2552:%.*]] = phi i32 [ [[TMP1084]], [[COND_TRUE2549]] ], [ [[TMP1085]], [[COND_FALSE2550]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2552]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1086:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1087:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2553:%.*]] = icmp eq i32 [[TMP1086]], [[TMP1087]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2553]], label [[IF_THEN2555:%.*]], label [[IF_END2556:%.*]] +// SIMD-ONLY0: if.then2555: +// SIMD-ONLY0-NEXT: [[TMP1088:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1088]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2556]] +// SIMD-ONLY0: if.end2556: +// SIMD-ONLY0-NEXT: [[TMP1089:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1090:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2557:%.*]] = icmp eq i32 [[TMP1089]], [[TMP1090]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2557]], label [[IF_THEN2559:%.*]], label [[IF_END2560:%.*]] +// SIMD-ONLY0: if.then2559: +// SIMD-ONLY0-NEXT: [[TMP1091:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1091]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2560]] +// SIMD-ONLY0: if.end2560: +// SIMD-ONLY0-NEXT: [[TMP1092:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1093:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2561:%.*]] = icmp sgt i32 [[TMP1092]], [[TMP1093]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2561]], label [[COND_TRUE2563:%.*]], label [[COND_FALSE2564:%.*]] +// SIMD-ONLY0: cond.true2563: +// SIMD-ONLY0-NEXT: [[TMP1094:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2565:%.*]] +// SIMD-ONLY0: cond.false2564: +// SIMD-ONLY0-NEXT: [[TMP1095:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2565]] +// SIMD-ONLY0: cond.end2565: +// SIMD-ONLY0-NEXT: [[COND2566:%.*]] = phi i32 [ [[TMP1094]], [[COND_TRUE2563]] ], [ [[TMP1095]], [[COND_FALSE2564]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2566]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1096:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1097:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2567:%.*]] = icmp slt i32 [[TMP1096]], [[TMP1097]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2567]], label [[COND_TRUE2569:%.*]], label [[COND_FALSE2570:%.*]] +// SIMD-ONLY0: cond.true2569: +// SIMD-ONLY0-NEXT: [[TMP1098:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2571:%.*]] +// SIMD-ONLY0: cond.false2570: +// SIMD-ONLY0-NEXT: [[TMP1099:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2571]] +// SIMD-ONLY0: cond.end2571: +// SIMD-ONLY0-NEXT: [[COND2572:%.*]] = phi i32 [ [[TMP1098]], [[COND_TRUE2569]] ], [ [[TMP1099]], [[COND_FALSE2570]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2572]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1100:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1101:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2573:%.*]] = icmp sgt i32 [[TMP1100]], [[TMP1101]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2573]], label [[COND_TRUE2575:%.*]], label [[COND_FALSE2576:%.*]] +// SIMD-ONLY0: cond.true2575: +// SIMD-ONLY0-NEXT: [[TMP1102:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2577:%.*]] +// SIMD-ONLY0: cond.false2576: +// SIMD-ONLY0-NEXT: [[TMP1103:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2577]] +// SIMD-ONLY0: cond.end2577: +// SIMD-ONLY0-NEXT: [[COND2578:%.*]] = phi i32 [ [[TMP1102]], [[COND_TRUE2575]] ], [ [[TMP1103]], [[COND_FALSE2576]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2578]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1104:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1105:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2579:%.*]] = icmp slt i32 [[TMP1104]], [[TMP1105]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2579]], label [[COND_TRUE2581:%.*]], label [[COND_FALSE2582:%.*]] +// SIMD-ONLY0: cond.true2581: +// SIMD-ONLY0-NEXT: [[TMP1106:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2583:%.*]] +// SIMD-ONLY0: cond.false2582: +// SIMD-ONLY0-NEXT: [[TMP1107:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2583]] +// SIMD-ONLY0: cond.end2583: +// SIMD-ONLY0-NEXT: [[COND2584:%.*]] = phi i32 [ [[TMP1106]], [[COND_TRUE2581]] ], [ [[TMP1107]], [[COND_FALSE2582]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2584]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1108:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1109:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2585:%.*]] = icmp sgt i32 [[TMP1108]], [[TMP1109]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2585]], label [[IF_THEN2587:%.*]], label [[IF_END2588:%.*]] +// SIMD-ONLY0: if.then2587: +// SIMD-ONLY0-NEXT: [[TMP1110:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1110]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2588]] +// SIMD-ONLY0: if.end2588: +// SIMD-ONLY0-NEXT: [[TMP1111:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1112:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2589:%.*]] = icmp slt i32 [[TMP1111]], [[TMP1112]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2589]], label [[IF_THEN2591:%.*]], label [[IF_END2592:%.*]] +// SIMD-ONLY0: if.then2591: +// SIMD-ONLY0-NEXT: [[TMP1113:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1113]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2592]] +// SIMD-ONLY0: if.end2592: +// SIMD-ONLY0-NEXT: [[TMP1114:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1115:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2593:%.*]] = icmp sgt i32 [[TMP1114]], [[TMP1115]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2593]], label [[IF_THEN2595:%.*]], label [[IF_END2596:%.*]] +// SIMD-ONLY0: if.then2595: +// SIMD-ONLY0-NEXT: [[TMP1116:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1116]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2596]] +// SIMD-ONLY0: if.end2596: +// SIMD-ONLY0-NEXT: [[TMP1117:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1118:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2597:%.*]] = icmp slt i32 [[TMP1117]], [[TMP1118]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2597]], label [[IF_THEN2599:%.*]], label [[IF_END2600:%.*]] +// SIMD-ONLY0: if.then2599: +// SIMD-ONLY0-NEXT: [[TMP1119:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1119]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2600]] +// SIMD-ONLY0: if.end2600: +// SIMD-ONLY0-NEXT: [[TMP1120:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1121:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2601:%.*]] = icmp eq i32 [[TMP1120]], [[TMP1121]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2601]], label [[COND_TRUE2603:%.*]], label [[COND_FALSE2604:%.*]] +// SIMD-ONLY0: cond.true2603: +// SIMD-ONLY0-NEXT: [[TMP1122:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2605:%.*]] +// SIMD-ONLY0: cond.false2604: +// SIMD-ONLY0-NEXT: [[TMP1123:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2605]] +// SIMD-ONLY0: cond.end2605: +// SIMD-ONLY0-NEXT: [[COND2606:%.*]] = phi i32 [ [[TMP1122]], [[COND_TRUE2603]] ], [ [[TMP1123]], [[COND_FALSE2604]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2606]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1124:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1125:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2607:%.*]] = icmp eq i32 [[TMP1124]], [[TMP1125]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2607]], label [[COND_TRUE2609:%.*]], label [[COND_FALSE2610:%.*]] +// SIMD-ONLY0: cond.true2609: +// SIMD-ONLY0-NEXT: [[TMP1126:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2611:%.*]] +// SIMD-ONLY0: cond.false2610: +// SIMD-ONLY0-NEXT: [[TMP1127:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2611]] +// SIMD-ONLY0: cond.end2611: +// SIMD-ONLY0-NEXT: [[COND2612:%.*]] = phi i32 [ [[TMP1126]], [[COND_TRUE2609]] ], [ [[TMP1127]], [[COND_FALSE2610]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2612]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1128:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1129:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2613:%.*]] = icmp eq i32 [[TMP1128]], [[TMP1129]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2613]], label [[IF_THEN2615:%.*]], label [[IF_END2616:%.*]] +// SIMD-ONLY0: if.then2615: +// SIMD-ONLY0-NEXT: [[TMP1130:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1130]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2616]] +// SIMD-ONLY0: if.end2616: +// SIMD-ONLY0-NEXT: [[TMP1131:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1132:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2617:%.*]] = icmp eq i32 [[TMP1131]], [[TMP1132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2617]], label [[IF_THEN2619:%.*]], label [[IF_END2620:%.*]] +// SIMD-ONLY0: if.then2619: +// SIMD-ONLY0-NEXT: [[TMP1133:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1133]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2620]] +// SIMD-ONLY0: if.end2620: +// SIMD-ONLY0-NEXT: [[TMP1134:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1135:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2621:%.*]] = icmp ugt i32 [[TMP1134]], [[TMP1135]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2621]], label [[COND_TRUE2623:%.*]], label [[COND_FALSE2624:%.*]] +// SIMD-ONLY0: cond.true2623: +// SIMD-ONLY0-NEXT: [[TMP1136:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2625:%.*]] +// SIMD-ONLY0: cond.false2624: +// SIMD-ONLY0-NEXT: [[TMP1137:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2625]] +// SIMD-ONLY0: cond.end2625: +// SIMD-ONLY0-NEXT: [[COND2626:%.*]] = phi i32 [ [[TMP1136]], [[COND_TRUE2623]] ], [ [[TMP1137]], [[COND_FALSE2624]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2626]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1138:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1139:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2627:%.*]] = icmp ult i32 [[TMP1138]], [[TMP1139]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2627]], label [[COND_TRUE2629:%.*]], label [[COND_FALSE2630:%.*]] +// SIMD-ONLY0: cond.true2629: +// SIMD-ONLY0-NEXT: [[TMP1140:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2631:%.*]] +// SIMD-ONLY0: cond.false2630: +// SIMD-ONLY0-NEXT: [[TMP1141:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2631]] +// SIMD-ONLY0: cond.end2631: +// SIMD-ONLY0-NEXT: [[COND2632:%.*]] = phi i32 [ [[TMP1140]], [[COND_TRUE2629]] ], [ [[TMP1141]], [[COND_FALSE2630]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2632]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1142:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1143:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2633:%.*]] = icmp ugt i32 [[TMP1142]], [[TMP1143]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2633]], label [[COND_TRUE2635:%.*]], label [[COND_FALSE2636:%.*]] +// SIMD-ONLY0: cond.true2635: +// SIMD-ONLY0-NEXT: [[TMP1144:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2637:%.*]] +// SIMD-ONLY0: cond.false2636: +// SIMD-ONLY0-NEXT: [[TMP1145:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2637]] +// SIMD-ONLY0: cond.end2637: +// SIMD-ONLY0-NEXT: [[COND2638:%.*]] = phi i32 [ [[TMP1144]], [[COND_TRUE2635]] ], [ [[TMP1145]], [[COND_FALSE2636]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2638]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1146:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1147:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2639:%.*]] = icmp ult i32 [[TMP1146]], [[TMP1147]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2639]], label [[COND_TRUE2641:%.*]], label [[COND_FALSE2642:%.*]] +// SIMD-ONLY0: cond.true2641: +// SIMD-ONLY0-NEXT: [[TMP1148:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2643:%.*]] +// SIMD-ONLY0: cond.false2642: +// SIMD-ONLY0-NEXT: [[TMP1149:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2643]] +// SIMD-ONLY0: cond.end2643: +// SIMD-ONLY0-NEXT: [[COND2644:%.*]] = phi i32 [ [[TMP1148]], [[COND_TRUE2641]] ], [ [[TMP1149]], [[COND_FALSE2642]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2644]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1150:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1151:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2645:%.*]] = icmp ugt i32 [[TMP1150]], [[TMP1151]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2645]], label [[IF_THEN2647:%.*]], label [[IF_END2648:%.*]] +// SIMD-ONLY0: if.then2647: +// SIMD-ONLY0-NEXT: [[TMP1152:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1152]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2648]] +// SIMD-ONLY0: if.end2648: +// SIMD-ONLY0-NEXT: [[TMP1153:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1154:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2649:%.*]] = icmp ult i32 [[TMP1153]], [[TMP1154]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2649]], label [[IF_THEN2651:%.*]], label [[IF_END2652:%.*]] +// SIMD-ONLY0: if.then2651: +// SIMD-ONLY0-NEXT: [[TMP1155:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1155]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2652]] +// SIMD-ONLY0: if.end2652: +// SIMD-ONLY0-NEXT: [[TMP1156:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1157:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2653:%.*]] = icmp ugt i32 [[TMP1156]], [[TMP1157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2653]], label [[IF_THEN2655:%.*]], label [[IF_END2656:%.*]] +// SIMD-ONLY0: if.then2655: +// SIMD-ONLY0-NEXT: [[TMP1158:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1158]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2656]] +// SIMD-ONLY0: if.end2656: +// SIMD-ONLY0-NEXT: [[TMP1159:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1160:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2657:%.*]] = icmp ult i32 [[TMP1159]], [[TMP1160]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2657]], label [[IF_THEN2659:%.*]], label [[IF_END2660:%.*]] +// SIMD-ONLY0: if.then2659: +// SIMD-ONLY0-NEXT: [[TMP1161:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1161]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2660]] +// SIMD-ONLY0: if.end2660: +// SIMD-ONLY0-NEXT: [[TMP1162:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1163:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2661:%.*]] = icmp eq i32 [[TMP1162]], [[TMP1163]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2661]], label [[COND_TRUE2663:%.*]], label [[COND_FALSE2664:%.*]] +// SIMD-ONLY0: cond.true2663: +// SIMD-ONLY0-NEXT: [[TMP1164:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2665:%.*]] +// SIMD-ONLY0: cond.false2664: +// SIMD-ONLY0-NEXT: [[TMP1165:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2665]] +// SIMD-ONLY0: cond.end2665: +// SIMD-ONLY0-NEXT: [[COND2666:%.*]] = phi i32 [ [[TMP1164]], [[COND_TRUE2663]] ], [ [[TMP1165]], [[COND_FALSE2664]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2666]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1166:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1167:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2667:%.*]] = icmp eq i32 [[TMP1166]], [[TMP1167]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2667]], label [[COND_TRUE2669:%.*]], label [[COND_FALSE2670:%.*]] +// SIMD-ONLY0: cond.true2669: +// SIMD-ONLY0-NEXT: [[TMP1168:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2671:%.*]] +// SIMD-ONLY0: cond.false2670: +// SIMD-ONLY0-NEXT: [[TMP1169:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2671]] +// SIMD-ONLY0: cond.end2671: +// SIMD-ONLY0-NEXT: [[COND2672:%.*]] = phi i32 [ [[TMP1168]], [[COND_TRUE2669]] ], [ [[TMP1169]], [[COND_FALSE2670]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2672]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1170:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1171:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2673:%.*]] = icmp eq i32 [[TMP1170]], [[TMP1171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2673]], label [[IF_THEN2675:%.*]], label [[IF_END2676:%.*]] +// SIMD-ONLY0: if.then2675: +// SIMD-ONLY0-NEXT: [[TMP1172:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1172]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2676]] +// SIMD-ONLY0: if.end2676: +// SIMD-ONLY0-NEXT: [[TMP1173:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1174:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2677:%.*]] = icmp eq i32 [[TMP1173]], [[TMP1174]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2677]], label [[IF_THEN2679:%.*]], label [[IF_END2680:%.*]] +// SIMD-ONLY0: if.then2679: +// SIMD-ONLY0-NEXT: [[TMP1175:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1175]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2680]] +// SIMD-ONLY0: if.end2680: +// SIMD-ONLY0-NEXT: [[TMP1176:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1177:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2681:%.*]] = icmp sgt i32 [[TMP1176]], [[TMP1177]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2681]], label [[COND_TRUE2683:%.*]], label [[COND_FALSE2684:%.*]] +// SIMD-ONLY0: cond.true2683: +// SIMD-ONLY0-NEXT: [[TMP1178:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2685:%.*]] +// SIMD-ONLY0: cond.false2684: +// SIMD-ONLY0-NEXT: [[TMP1179:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2685]] +// SIMD-ONLY0: cond.end2685: +// SIMD-ONLY0-NEXT: [[COND2686:%.*]] = phi i32 [ [[TMP1178]], [[COND_TRUE2683]] ], [ [[TMP1179]], [[COND_FALSE2684]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2686]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1180:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1181:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2687:%.*]] = icmp slt i32 [[TMP1180]], [[TMP1181]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2687]], label [[COND_TRUE2689:%.*]], label [[COND_FALSE2690:%.*]] +// SIMD-ONLY0: cond.true2689: +// SIMD-ONLY0-NEXT: [[TMP1182:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2691:%.*]] +// SIMD-ONLY0: cond.false2690: +// SIMD-ONLY0-NEXT: [[TMP1183:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2691]] +// SIMD-ONLY0: cond.end2691: +// SIMD-ONLY0-NEXT: [[COND2692:%.*]] = phi i32 [ [[TMP1182]], [[COND_TRUE2689]] ], [ [[TMP1183]], [[COND_FALSE2690]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2692]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1184:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1185:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2693:%.*]] = icmp sgt i32 [[TMP1184]], [[TMP1185]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2693]], label [[COND_TRUE2695:%.*]], label [[COND_FALSE2696:%.*]] +// SIMD-ONLY0: cond.true2695: +// SIMD-ONLY0-NEXT: [[TMP1186:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2697:%.*]] +// SIMD-ONLY0: cond.false2696: +// SIMD-ONLY0-NEXT: [[TMP1187:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2697]] +// SIMD-ONLY0: cond.end2697: +// SIMD-ONLY0-NEXT: [[COND2698:%.*]] = phi i32 [ [[TMP1186]], [[COND_TRUE2695]] ], [ [[TMP1187]], [[COND_FALSE2696]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2698]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1188:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1189:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2699:%.*]] = icmp slt i32 [[TMP1188]], [[TMP1189]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2699]], label [[COND_TRUE2701:%.*]], label [[COND_FALSE2702:%.*]] +// SIMD-ONLY0: cond.true2701: +// SIMD-ONLY0-NEXT: [[TMP1190:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2703:%.*]] +// SIMD-ONLY0: cond.false2702: +// SIMD-ONLY0-NEXT: [[TMP1191:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2703]] +// SIMD-ONLY0: cond.end2703: +// SIMD-ONLY0-NEXT: [[COND2704:%.*]] = phi i32 [ [[TMP1190]], [[COND_TRUE2701]] ], [ [[TMP1191]], [[COND_FALSE2702]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2704]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1192:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1193:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2705:%.*]] = icmp sgt i32 [[TMP1192]], [[TMP1193]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2705]], label [[IF_THEN2707:%.*]], label [[IF_END2708:%.*]] +// SIMD-ONLY0: if.then2707: +// SIMD-ONLY0-NEXT: [[TMP1194:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1194]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2708]] +// SIMD-ONLY0: if.end2708: +// SIMD-ONLY0-NEXT: [[TMP1195:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1196:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2709:%.*]] = icmp slt i32 [[TMP1195]], [[TMP1196]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2709]], label [[IF_THEN2711:%.*]], label [[IF_END2712:%.*]] +// SIMD-ONLY0: if.then2711: +// SIMD-ONLY0-NEXT: [[TMP1197:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1197]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2712]] +// SIMD-ONLY0: if.end2712: +// SIMD-ONLY0-NEXT: [[TMP1198:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1199:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2713:%.*]] = icmp sgt i32 [[TMP1198]], [[TMP1199]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2713]], label [[IF_THEN2715:%.*]], label [[IF_END2716:%.*]] +// SIMD-ONLY0: if.then2715: +// SIMD-ONLY0-NEXT: [[TMP1200:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1200]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2716]] +// SIMD-ONLY0: if.end2716: +// SIMD-ONLY0-NEXT: [[TMP1201:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1202:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2717:%.*]] = icmp slt i32 [[TMP1201]], [[TMP1202]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2717]], label [[IF_THEN2719:%.*]], label [[IF_END2720:%.*]] +// SIMD-ONLY0: if.then2719: +// SIMD-ONLY0-NEXT: [[TMP1203:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1203]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2720]] +// SIMD-ONLY0: if.end2720: +// SIMD-ONLY0-NEXT: [[TMP1204:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1205:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2721:%.*]] = icmp eq i32 [[TMP1204]], [[TMP1205]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2721]], label [[COND_TRUE2723:%.*]], label [[COND_FALSE2724:%.*]] +// SIMD-ONLY0: cond.true2723: +// SIMD-ONLY0-NEXT: [[TMP1206:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2725:%.*]] +// SIMD-ONLY0: cond.false2724: +// SIMD-ONLY0-NEXT: [[TMP1207:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2725]] +// SIMD-ONLY0: cond.end2725: +// SIMD-ONLY0-NEXT: [[COND2726:%.*]] = phi i32 [ [[TMP1206]], [[COND_TRUE2723]] ], [ [[TMP1207]], [[COND_FALSE2724]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2726]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1208:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1209:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2727:%.*]] = icmp eq i32 [[TMP1208]], [[TMP1209]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2727]], label [[COND_TRUE2729:%.*]], label [[COND_FALSE2730:%.*]] +// SIMD-ONLY0: cond.true2729: +// SIMD-ONLY0-NEXT: [[TMP1210:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2731:%.*]] +// SIMD-ONLY0: cond.false2730: +// SIMD-ONLY0-NEXT: [[TMP1211:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2731]] +// SIMD-ONLY0: cond.end2731: +// SIMD-ONLY0-NEXT: [[COND2732:%.*]] = phi i32 [ [[TMP1210]], [[COND_TRUE2729]] ], [ [[TMP1211]], [[COND_FALSE2730]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2732]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1212:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1213:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2733:%.*]] = icmp eq i32 [[TMP1212]], [[TMP1213]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2733]], label [[IF_THEN2735:%.*]], label [[IF_END2736:%.*]] +// SIMD-ONLY0: if.then2735: +// SIMD-ONLY0-NEXT: [[TMP1214:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1214]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2736]] +// SIMD-ONLY0: if.end2736: +// SIMD-ONLY0-NEXT: [[TMP1215:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1216:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2737:%.*]] = icmp eq i32 [[TMP1215]], [[TMP1216]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2737]], label [[IF_THEN2739:%.*]], label [[IF_END2740:%.*]] +// SIMD-ONLY0: if.then2739: +// SIMD-ONLY0-NEXT: [[TMP1217:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1217]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2740]] +// SIMD-ONLY0: if.end2740: +// SIMD-ONLY0-NEXT: [[TMP1218:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1219:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2741:%.*]] = icmp ugt i32 [[TMP1218]], [[TMP1219]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2741]], label [[COND_TRUE2743:%.*]], label [[COND_FALSE2744:%.*]] +// SIMD-ONLY0: cond.true2743: +// SIMD-ONLY0-NEXT: [[TMP1220:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2745:%.*]] +// SIMD-ONLY0: cond.false2744: +// SIMD-ONLY0-NEXT: [[TMP1221:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2745]] +// SIMD-ONLY0: cond.end2745: +// SIMD-ONLY0-NEXT: [[COND2746:%.*]] = phi i32 [ [[TMP1220]], [[COND_TRUE2743]] ], [ [[TMP1221]], [[COND_FALSE2744]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2746]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1222:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1223:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2747:%.*]] = icmp ult i32 [[TMP1222]], [[TMP1223]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2747]], label [[COND_TRUE2749:%.*]], label [[COND_FALSE2750:%.*]] +// SIMD-ONLY0: cond.true2749: +// SIMD-ONLY0-NEXT: [[TMP1224:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2751:%.*]] +// SIMD-ONLY0: cond.false2750: +// SIMD-ONLY0-NEXT: [[TMP1225:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2751]] +// SIMD-ONLY0: cond.end2751: +// SIMD-ONLY0-NEXT: [[COND2752:%.*]] = phi i32 [ [[TMP1224]], [[COND_TRUE2749]] ], [ [[TMP1225]], [[COND_FALSE2750]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2752]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1226:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1227:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2753:%.*]] = icmp ugt i32 [[TMP1226]], [[TMP1227]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2753]], label [[COND_TRUE2755:%.*]], label [[COND_FALSE2756:%.*]] +// SIMD-ONLY0: cond.true2755: +// SIMD-ONLY0-NEXT: [[TMP1228:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2757:%.*]] +// SIMD-ONLY0: cond.false2756: +// SIMD-ONLY0-NEXT: [[TMP1229:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2757]] +// SIMD-ONLY0: cond.end2757: +// SIMD-ONLY0-NEXT: [[COND2758:%.*]] = phi i32 [ [[TMP1228]], [[COND_TRUE2755]] ], [ [[TMP1229]], [[COND_FALSE2756]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2758]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1230:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1231:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2759:%.*]] = icmp ult i32 [[TMP1230]], [[TMP1231]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2759]], label [[COND_TRUE2761:%.*]], label [[COND_FALSE2762:%.*]] +// SIMD-ONLY0: cond.true2761: +// SIMD-ONLY0-NEXT: [[TMP1232:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2763:%.*]] +// SIMD-ONLY0: cond.false2762: +// SIMD-ONLY0-NEXT: [[TMP1233:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2763]] +// SIMD-ONLY0: cond.end2763: +// SIMD-ONLY0-NEXT: [[COND2764:%.*]] = phi i32 [ [[TMP1232]], [[COND_TRUE2761]] ], [ [[TMP1233]], [[COND_FALSE2762]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2764]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1234:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1235:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2765:%.*]] = icmp ugt i32 [[TMP1234]], [[TMP1235]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2765]], label [[IF_THEN2767:%.*]], label [[IF_END2768:%.*]] +// SIMD-ONLY0: if.then2767: +// SIMD-ONLY0-NEXT: [[TMP1236:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1236]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2768]] +// SIMD-ONLY0: if.end2768: +// SIMD-ONLY0-NEXT: [[TMP1237:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1238:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2769:%.*]] = icmp ult i32 [[TMP1237]], [[TMP1238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2769]], label [[IF_THEN2771:%.*]], label [[IF_END2772:%.*]] +// SIMD-ONLY0: if.then2771: +// SIMD-ONLY0-NEXT: [[TMP1239:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1239]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2772]] +// SIMD-ONLY0: if.end2772: +// SIMD-ONLY0-NEXT: [[TMP1240:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1241:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2773:%.*]] = icmp ugt i32 [[TMP1240]], [[TMP1241]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2773]], label [[IF_THEN2775:%.*]], label [[IF_END2776:%.*]] +// SIMD-ONLY0: if.then2775: +// SIMD-ONLY0-NEXT: [[TMP1242:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1242]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2776]] +// SIMD-ONLY0: if.end2776: +// SIMD-ONLY0-NEXT: [[TMP1243:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1244:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2777:%.*]] = icmp ult i32 [[TMP1243]], [[TMP1244]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2777]], label [[IF_THEN2779:%.*]], label [[IF_END2780:%.*]] +// SIMD-ONLY0: if.then2779: +// SIMD-ONLY0-NEXT: [[TMP1245:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1245]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2780]] +// SIMD-ONLY0: if.end2780: +// SIMD-ONLY0-NEXT: [[TMP1246:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1247:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2781:%.*]] = icmp eq i32 [[TMP1246]], [[TMP1247]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2781]], label [[COND_TRUE2783:%.*]], label [[COND_FALSE2784:%.*]] +// SIMD-ONLY0: cond.true2783: +// SIMD-ONLY0-NEXT: [[TMP1248:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2785:%.*]] +// SIMD-ONLY0: cond.false2784: +// SIMD-ONLY0-NEXT: [[TMP1249:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2785]] +// SIMD-ONLY0: cond.end2785: +// SIMD-ONLY0-NEXT: [[COND2786:%.*]] = phi i32 [ [[TMP1248]], [[COND_TRUE2783]] ], [ [[TMP1249]], [[COND_FALSE2784]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2786]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1250:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1251:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2787:%.*]] = icmp eq i32 [[TMP1250]], [[TMP1251]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2787]], label [[COND_TRUE2789:%.*]], label [[COND_FALSE2790:%.*]] +// SIMD-ONLY0: cond.true2789: +// SIMD-ONLY0-NEXT: [[TMP1252:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2791:%.*]] +// SIMD-ONLY0: cond.false2790: +// SIMD-ONLY0-NEXT: [[TMP1253:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2791]] +// SIMD-ONLY0: cond.end2791: +// SIMD-ONLY0-NEXT: [[COND2792:%.*]] = phi i32 [ [[TMP1252]], [[COND_TRUE2789]] ], [ [[TMP1253]], [[COND_FALSE2790]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2792]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1254:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1255:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2793:%.*]] = icmp eq i32 [[TMP1254]], [[TMP1255]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2793]], label [[IF_THEN2795:%.*]], label [[IF_END2796:%.*]] +// SIMD-ONLY0: if.then2795: +// SIMD-ONLY0-NEXT: [[TMP1256:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1256]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2796]] +// SIMD-ONLY0: if.end2796: +// SIMD-ONLY0-NEXT: [[TMP1257:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1258:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2797:%.*]] = icmp eq i32 [[TMP1257]], [[TMP1258]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2797]], label [[IF_THEN2799:%.*]], label [[IF_END2800:%.*]] +// SIMD-ONLY0: if.then2799: +// SIMD-ONLY0-NEXT: [[TMP1259:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1259]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2800]] +// SIMD-ONLY0: if.end2800: +// SIMD-ONLY0-NEXT: [[TMP1260:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1261:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2801:%.*]] = icmp sgt i32 [[TMP1260]], [[TMP1261]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2801]], label [[COND_TRUE2803:%.*]], label [[COND_FALSE2804:%.*]] +// SIMD-ONLY0: cond.true2803: +// SIMD-ONLY0-NEXT: [[TMP1262:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2805:%.*]] +// SIMD-ONLY0: cond.false2804: +// SIMD-ONLY0-NEXT: [[TMP1263:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2805]] +// SIMD-ONLY0: cond.end2805: +// SIMD-ONLY0-NEXT: [[COND2806:%.*]] = phi i32 [ [[TMP1262]], [[COND_TRUE2803]] ], [ [[TMP1263]], [[COND_FALSE2804]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2806]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1264:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1265:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2807:%.*]] = icmp slt i32 [[TMP1264]], [[TMP1265]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2807]], label [[COND_TRUE2809:%.*]], label [[COND_FALSE2810:%.*]] +// SIMD-ONLY0: cond.true2809: +// SIMD-ONLY0-NEXT: [[TMP1266:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2811:%.*]] +// SIMD-ONLY0: cond.false2810: +// SIMD-ONLY0-NEXT: [[TMP1267:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2811]] +// SIMD-ONLY0: cond.end2811: +// SIMD-ONLY0-NEXT: [[COND2812:%.*]] = phi i32 [ [[TMP1266]], [[COND_TRUE2809]] ], [ [[TMP1267]], [[COND_FALSE2810]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2812]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1268:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1269:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2813:%.*]] = icmp sgt i32 [[TMP1268]], [[TMP1269]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2813]], label [[COND_TRUE2815:%.*]], label [[COND_FALSE2816:%.*]] +// SIMD-ONLY0: cond.true2815: +// SIMD-ONLY0-NEXT: [[TMP1270:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2817:%.*]] +// SIMD-ONLY0: cond.false2816: +// SIMD-ONLY0-NEXT: [[TMP1271:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2817]] +// SIMD-ONLY0: cond.end2817: +// SIMD-ONLY0-NEXT: [[COND2818:%.*]] = phi i32 [ [[TMP1270]], [[COND_TRUE2815]] ], [ [[TMP1271]], [[COND_FALSE2816]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2818]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1272:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1273:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2819:%.*]] = icmp slt i32 [[TMP1272]], [[TMP1273]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2819]], label [[COND_TRUE2821:%.*]], label [[COND_FALSE2822:%.*]] +// SIMD-ONLY0: cond.true2821: +// SIMD-ONLY0-NEXT: [[TMP1274:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2823:%.*]] +// SIMD-ONLY0: cond.false2822: +// SIMD-ONLY0-NEXT: [[TMP1275:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2823]] +// SIMD-ONLY0: cond.end2823: +// SIMD-ONLY0-NEXT: [[COND2824:%.*]] = phi i32 [ [[TMP1274]], [[COND_TRUE2821]] ], [ [[TMP1275]], [[COND_FALSE2822]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2824]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1276:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1277:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2825:%.*]] = icmp sgt i32 [[TMP1276]], [[TMP1277]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2825]], label [[IF_THEN2827:%.*]], label [[IF_END2828:%.*]] +// SIMD-ONLY0: if.then2827: +// SIMD-ONLY0-NEXT: [[TMP1278:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1278]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2828]] +// SIMD-ONLY0: if.end2828: +// SIMD-ONLY0-NEXT: [[TMP1279:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1280:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2829:%.*]] = icmp slt i32 [[TMP1279]], [[TMP1280]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2829]], label [[IF_THEN2831:%.*]], label [[IF_END2832:%.*]] +// SIMD-ONLY0: if.then2831: +// SIMD-ONLY0-NEXT: [[TMP1281:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1281]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2832]] +// SIMD-ONLY0: if.end2832: +// SIMD-ONLY0-NEXT: [[TMP1282:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1283:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2833:%.*]] = icmp sgt i32 [[TMP1282]], [[TMP1283]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2833]], label [[IF_THEN2835:%.*]], label [[IF_END2836:%.*]] +// SIMD-ONLY0: if.then2835: +// SIMD-ONLY0-NEXT: [[TMP1284:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1284]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2836]] +// SIMD-ONLY0: if.end2836: +// SIMD-ONLY0-NEXT: [[TMP1285:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1286:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2837:%.*]] = icmp slt i32 [[TMP1285]], [[TMP1286]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2837]], label [[IF_THEN2839:%.*]], label [[IF_END2840:%.*]] +// SIMD-ONLY0: if.then2839: +// SIMD-ONLY0-NEXT: [[TMP1287:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1287]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2840]] +// SIMD-ONLY0: if.end2840: +// SIMD-ONLY0-NEXT: [[TMP1288:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1289:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2841:%.*]] = icmp eq i32 [[TMP1288]], [[TMP1289]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2841]], label [[COND_TRUE2843:%.*]], label [[COND_FALSE2844:%.*]] +// SIMD-ONLY0: cond.true2843: +// SIMD-ONLY0-NEXT: [[TMP1290:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2845:%.*]] +// SIMD-ONLY0: cond.false2844: +// SIMD-ONLY0-NEXT: [[TMP1291:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2845]] +// SIMD-ONLY0: cond.end2845: +// SIMD-ONLY0-NEXT: [[COND2846:%.*]] = phi i32 [ [[TMP1290]], [[COND_TRUE2843]] ], [ [[TMP1291]], [[COND_FALSE2844]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2846]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1292:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1293:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2847:%.*]] = icmp eq i32 [[TMP1292]], [[TMP1293]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2847]], label [[COND_TRUE2849:%.*]], label [[COND_FALSE2850:%.*]] +// SIMD-ONLY0: cond.true2849: +// SIMD-ONLY0-NEXT: [[TMP1294:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2851:%.*]] +// SIMD-ONLY0: cond.false2850: +// SIMD-ONLY0-NEXT: [[TMP1295:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2851]] +// SIMD-ONLY0: cond.end2851: +// SIMD-ONLY0-NEXT: [[COND2852:%.*]] = phi i32 [ [[TMP1294]], [[COND_TRUE2849]] ], [ [[TMP1295]], [[COND_FALSE2850]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2852]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1296:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1297:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2853:%.*]] = icmp eq i32 [[TMP1296]], [[TMP1297]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2853]], label [[IF_THEN2855:%.*]], label [[IF_END2856:%.*]] +// SIMD-ONLY0: if.then2855: +// SIMD-ONLY0-NEXT: [[TMP1298:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1298]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2856]] +// SIMD-ONLY0: if.end2856: +// SIMD-ONLY0-NEXT: [[TMP1299:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1300:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2857:%.*]] = icmp eq i32 [[TMP1299]], [[TMP1300]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2857]], label [[IF_THEN2859:%.*]], label [[IF_END2860:%.*]] +// SIMD-ONLY0: if.then2859: +// SIMD-ONLY0-NEXT: [[TMP1301:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1301]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2860]] +// SIMD-ONLY0: if.end2860: +// SIMD-ONLY0-NEXT: [[TMP1302:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1303:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2861:%.*]] = icmp ugt i32 [[TMP1302]], [[TMP1303]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2861]], label [[COND_TRUE2863:%.*]], label [[COND_FALSE2864:%.*]] +// SIMD-ONLY0: cond.true2863: +// SIMD-ONLY0-NEXT: [[TMP1304:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2865:%.*]] +// SIMD-ONLY0: cond.false2864: +// SIMD-ONLY0-NEXT: [[TMP1305:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2865]] +// SIMD-ONLY0: cond.end2865: +// SIMD-ONLY0-NEXT: [[COND2866:%.*]] = phi i32 [ [[TMP1304]], [[COND_TRUE2863]] ], [ [[TMP1305]], [[COND_FALSE2864]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2866]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1306:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1307:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2867:%.*]] = icmp ult i32 [[TMP1306]], [[TMP1307]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2867]], label [[COND_TRUE2869:%.*]], label [[COND_FALSE2870:%.*]] +// SIMD-ONLY0: cond.true2869: +// SIMD-ONLY0-NEXT: [[TMP1308:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2871:%.*]] +// SIMD-ONLY0: cond.false2870: +// SIMD-ONLY0-NEXT: [[TMP1309:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2871]] +// SIMD-ONLY0: cond.end2871: +// SIMD-ONLY0-NEXT: [[COND2872:%.*]] = phi i32 [ [[TMP1308]], [[COND_TRUE2869]] ], [ [[TMP1309]], [[COND_FALSE2870]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2872]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1310:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1311:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2873:%.*]] = icmp ugt i32 [[TMP1310]], [[TMP1311]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2873]], label [[COND_TRUE2875:%.*]], label [[COND_FALSE2876:%.*]] +// SIMD-ONLY0: cond.true2875: +// SIMD-ONLY0-NEXT: [[TMP1312:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2877:%.*]] +// SIMD-ONLY0: cond.false2876: +// SIMD-ONLY0-NEXT: [[TMP1313:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2877]] +// SIMD-ONLY0: cond.end2877: +// SIMD-ONLY0-NEXT: [[COND2878:%.*]] = phi i32 [ [[TMP1312]], [[COND_TRUE2875]] ], [ [[TMP1313]], [[COND_FALSE2876]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2878]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1314:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1315:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2879:%.*]] = icmp ult i32 [[TMP1314]], [[TMP1315]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2879]], label [[COND_TRUE2881:%.*]], label [[COND_FALSE2882:%.*]] +// SIMD-ONLY0: cond.true2881: +// SIMD-ONLY0-NEXT: [[TMP1316:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2883:%.*]] +// SIMD-ONLY0: cond.false2882: +// SIMD-ONLY0-NEXT: [[TMP1317:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2883]] +// SIMD-ONLY0: cond.end2883: +// SIMD-ONLY0-NEXT: [[COND2884:%.*]] = phi i32 [ [[TMP1316]], [[COND_TRUE2881]] ], [ [[TMP1317]], [[COND_FALSE2882]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2884]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1318:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1319:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2885:%.*]] = icmp ugt i32 [[TMP1318]], [[TMP1319]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2885]], label [[IF_THEN2887:%.*]], label [[IF_END2888:%.*]] +// SIMD-ONLY0: if.then2887: +// SIMD-ONLY0-NEXT: [[TMP1320:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1320]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2888]] +// SIMD-ONLY0: if.end2888: +// SIMD-ONLY0-NEXT: [[TMP1321:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1322:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2889:%.*]] = icmp ult i32 [[TMP1321]], [[TMP1322]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2889]], label [[IF_THEN2891:%.*]], label [[IF_END2892:%.*]] +// SIMD-ONLY0: if.then2891: +// SIMD-ONLY0-NEXT: [[TMP1323:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1323]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2892]] +// SIMD-ONLY0: if.end2892: +// SIMD-ONLY0-NEXT: [[TMP1324:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1325:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2893:%.*]] = icmp ugt i32 [[TMP1324]], [[TMP1325]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2893]], label [[IF_THEN2895:%.*]], label [[IF_END2896:%.*]] +// SIMD-ONLY0: if.then2895: +// SIMD-ONLY0-NEXT: [[TMP1326:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1326]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2896]] +// SIMD-ONLY0: if.end2896: +// SIMD-ONLY0-NEXT: [[TMP1327:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1328:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2897:%.*]] = icmp ult i32 [[TMP1327]], [[TMP1328]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2897]], label [[IF_THEN2899:%.*]], label [[IF_END2900:%.*]] +// SIMD-ONLY0: if.then2899: +// SIMD-ONLY0-NEXT: [[TMP1329:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1329]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2900]] +// SIMD-ONLY0: if.end2900: +// SIMD-ONLY0-NEXT: [[TMP1330:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1331:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2901:%.*]] = icmp eq i32 [[TMP1330]], [[TMP1331]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2901]], label [[COND_TRUE2903:%.*]], label [[COND_FALSE2904:%.*]] +// SIMD-ONLY0: cond.true2903: +// SIMD-ONLY0-NEXT: [[TMP1332:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2905:%.*]] +// SIMD-ONLY0: cond.false2904: +// SIMD-ONLY0-NEXT: [[TMP1333:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2905]] +// SIMD-ONLY0: cond.end2905: +// SIMD-ONLY0-NEXT: [[COND2906:%.*]] = phi i32 [ [[TMP1332]], [[COND_TRUE2903]] ], [ [[TMP1333]], [[COND_FALSE2904]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2906]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1334:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1335:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2907:%.*]] = icmp eq i32 [[TMP1334]], [[TMP1335]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2907]], label [[COND_TRUE2909:%.*]], label [[COND_FALSE2910:%.*]] +// SIMD-ONLY0: cond.true2909: +// SIMD-ONLY0-NEXT: [[TMP1336:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2911:%.*]] +// SIMD-ONLY0: cond.false2910: +// SIMD-ONLY0-NEXT: [[TMP1337:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2911]] +// SIMD-ONLY0: cond.end2911: +// SIMD-ONLY0-NEXT: [[COND2912:%.*]] = phi i32 [ [[TMP1336]], [[COND_TRUE2909]] ], [ [[TMP1337]], [[COND_FALSE2910]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2912]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1338:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1339:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2913:%.*]] = icmp eq i32 [[TMP1338]], [[TMP1339]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2913]], label [[IF_THEN2915:%.*]], label [[IF_END2916:%.*]] +// SIMD-ONLY0: if.then2915: +// SIMD-ONLY0-NEXT: [[TMP1340:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1340]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2916]] +// SIMD-ONLY0: if.end2916: +// SIMD-ONLY0-NEXT: [[TMP1341:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1342:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2917:%.*]] = icmp eq i32 [[TMP1341]], [[TMP1342]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2917]], label [[IF_THEN2919:%.*]], label [[IF_END2920:%.*]] +// SIMD-ONLY0: if.then2919: +// SIMD-ONLY0-NEXT: [[TMP1343:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1343]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2920]] +// SIMD-ONLY0: if.end2920: +// SIMD-ONLY0-NEXT: [[TMP1344:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1345:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2921:%.*]] = icmp sgt i32 [[TMP1344]], [[TMP1345]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2921]], label [[COND_TRUE2923:%.*]], label [[COND_FALSE2924:%.*]] +// SIMD-ONLY0: cond.true2923: +// SIMD-ONLY0-NEXT: [[TMP1346:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2925:%.*]] +// SIMD-ONLY0: cond.false2924: +// SIMD-ONLY0-NEXT: [[TMP1347:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2925]] +// SIMD-ONLY0: cond.end2925: +// SIMD-ONLY0-NEXT: [[COND2926:%.*]] = phi i32 [ [[TMP1346]], [[COND_TRUE2923]] ], [ [[TMP1347]], [[COND_FALSE2924]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2926]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1348:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1349:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2927:%.*]] = icmp slt i32 [[TMP1348]], [[TMP1349]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2927]], label [[COND_TRUE2929:%.*]], label [[COND_FALSE2930:%.*]] +// SIMD-ONLY0: cond.true2929: +// SIMD-ONLY0-NEXT: [[TMP1350:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2931:%.*]] +// SIMD-ONLY0: cond.false2930: +// SIMD-ONLY0-NEXT: [[TMP1351:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2931]] +// SIMD-ONLY0: cond.end2931: +// SIMD-ONLY0-NEXT: [[COND2932:%.*]] = phi i32 [ [[TMP1350]], [[COND_TRUE2929]] ], [ [[TMP1351]], [[COND_FALSE2930]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2932]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1352:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1353:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2933:%.*]] = icmp sgt i32 [[TMP1352]], [[TMP1353]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2933]], label [[COND_TRUE2935:%.*]], label [[COND_FALSE2936:%.*]] +// SIMD-ONLY0: cond.true2935: +// SIMD-ONLY0-NEXT: [[TMP1354:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2937:%.*]] +// SIMD-ONLY0: cond.false2936: +// SIMD-ONLY0-NEXT: [[TMP1355:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2937]] +// SIMD-ONLY0: cond.end2937: +// SIMD-ONLY0-NEXT: [[COND2938:%.*]] = phi i32 [ [[TMP1354]], [[COND_TRUE2935]] ], [ [[TMP1355]], [[COND_FALSE2936]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2938]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1356:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1357:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2939:%.*]] = icmp slt i32 [[TMP1356]], [[TMP1357]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2939]], label [[COND_TRUE2941:%.*]], label [[COND_FALSE2942:%.*]] +// SIMD-ONLY0: cond.true2941: +// SIMD-ONLY0-NEXT: [[TMP1358:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2943:%.*]] +// SIMD-ONLY0: cond.false2942: +// SIMD-ONLY0-NEXT: [[TMP1359:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2943]] +// SIMD-ONLY0: cond.end2943: +// SIMD-ONLY0-NEXT: [[COND2944:%.*]] = phi i32 [ [[TMP1358]], [[COND_TRUE2941]] ], [ [[TMP1359]], [[COND_FALSE2942]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2944]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1360:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1361:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2945:%.*]] = icmp sgt i32 [[TMP1360]], [[TMP1361]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2945]], label [[IF_THEN2947:%.*]], label [[IF_END2948:%.*]] +// SIMD-ONLY0: if.then2947: +// SIMD-ONLY0-NEXT: [[TMP1362:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1362]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2948]] +// SIMD-ONLY0: if.end2948: +// SIMD-ONLY0-NEXT: [[TMP1363:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1364:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2949:%.*]] = icmp slt i32 [[TMP1363]], [[TMP1364]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2949]], label [[IF_THEN2951:%.*]], label [[IF_END2952:%.*]] +// SIMD-ONLY0: if.then2951: +// SIMD-ONLY0-NEXT: [[TMP1365:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1365]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2952]] +// SIMD-ONLY0: if.end2952: +// SIMD-ONLY0-NEXT: [[TMP1366:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1367:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2953:%.*]] = icmp sgt i32 [[TMP1366]], [[TMP1367]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2953]], label [[IF_THEN2955:%.*]], label [[IF_END2956:%.*]] +// SIMD-ONLY0: if.then2955: +// SIMD-ONLY0-NEXT: [[TMP1368:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1368]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2956]] +// SIMD-ONLY0: if.end2956: +// SIMD-ONLY0-NEXT: [[TMP1369:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1370:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2957:%.*]] = icmp slt i32 [[TMP1369]], [[TMP1370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2957]], label [[IF_THEN2959:%.*]], label [[IF_END2960:%.*]] +// SIMD-ONLY0: if.then2959: +// SIMD-ONLY0-NEXT: [[TMP1371:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1371]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2960]] +// SIMD-ONLY0: if.end2960: +// SIMD-ONLY0-NEXT: [[TMP1372:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1373:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2961:%.*]] = icmp eq i32 [[TMP1372]], [[TMP1373]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2961]], label [[COND_TRUE2963:%.*]], label [[COND_FALSE2964:%.*]] +// SIMD-ONLY0: cond.true2963: +// SIMD-ONLY0-NEXT: [[TMP1374:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2965:%.*]] +// SIMD-ONLY0: cond.false2964: +// SIMD-ONLY0-NEXT: [[TMP1375:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2965]] +// SIMD-ONLY0: cond.end2965: +// SIMD-ONLY0-NEXT: [[COND2966:%.*]] = phi i32 [ [[TMP1374]], [[COND_TRUE2963]] ], [ [[TMP1375]], [[COND_FALSE2964]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2966]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1376:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1377:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2967:%.*]] = icmp eq i32 [[TMP1376]], [[TMP1377]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2967]], label [[COND_TRUE2969:%.*]], label [[COND_FALSE2970:%.*]] +// SIMD-ONLY0: cond.true2969: +// SIMD-ONLY0-NEXT: [[TMP1378:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2971:%.*]] +// SIMD-ONLY0: cond.false2970: +// SIMD-ONLY0-NEXT: [[TMP1379:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2971]] +// SIMD-ONLY0: cond.end2971: +// SIMD-ONLY0-NEXT: [[COND2972:%.*]] = phi i32 [ [[TMP1378]], [[COND_TRUE2969]] ], [ [[TMP1379]], [[COND_FALSE2970]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2972]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1380:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1381:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2973:%.*]] = icmp eq i32 [[TMP1380]], [[TMP1381]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2973]], label [[IF_THEN2975:%.*]], label [[IF_END2976:%.*]] +// SIMD-ONLY0: if.then2975: +// SIMD-ONLY0-NEXT: [[TMP1382:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1382]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2976]] +// SIMD-ONLY0: if.end2976: +// SIMD-ONLY0-NEXT: [[TMP1383:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1384:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2977:%.*]] = icmp eq i32 [[TMP1383]], [[TMP1384]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2977]], label [[IF_THEN2979:%.*]], label [[IF_END2980:%.*]] +// SIMD-ONLY0: if.then2979: +// SIMD-ONLY0-NEXT: [[TMP1385:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1385]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2980]] +// SIMD-ONLY0: if.end2980: +// SIMD-ONLY0-NEXT: [[TMP1386:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1387:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2981:%.*]] = icmp ugt i32 [[TMP1386]], [[TMP1387]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2981]], label [[COND_TRUE2983:%.*]], label [[COND_FALSE2984:%.*]] +// SIMD-ONLY0: cond.true2983: +// SIMD-ONLY0-NEXT: [[TMP1388:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2985:%.*]] +// SIMD-ONLY0: cond.false2984: +// SIMD-ONLY0-NEXT: [[TMP1389:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2985]] +// SIMD-ONLY0: cond.end2985: +// SIMD-ONLY0-NEXT: [[COND2986:%.*]] = phi i32 [ [[TMP1388]], [[COND_TRUE2983]] ], [ [[TMP1389]], [[COND_FALSE2984]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2986]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1390:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1391:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2987:%.*]] = icmp ult i32 [[TMP1390]], [[TMP1391]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2987]], label [[COND_TRUE2989:%.*]], label [[COND_FALSE2990:%.*]] +// SIMD-ONLY0: cond.true2989: +// SIMD-ONLY0-NEXT: [[TMP1392:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2991:%.*]] +// SIMD-ONLY0: cond.false2990: +// SIMD-ONLY0-NEXT: [[TMP1393:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2991]] +// SIMD-ONLY0: cond.end2991: +// SIMD-ONLY0-NEXT: [[COND2992:%.*]] = phi i32 [ [[TMP1392]], [[COND_TRUE2989]] ], [ [[TMP1393]], [[COND_FALSE2990]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2992]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1394:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1395:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2993:%.*]] = icmp ugt i32 [[TMP1394]], [[TMP1395]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2993]], label [[COND_TRUE2995:%.*]], label [[COND_FALSE2996:%.*]] +// SIMD-ONLY0: cond.true2995: +// SIMD-ONLY0-NEXT: [[TMP1396:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2997:%.*]] +// SIMD-ONLY0: cond.false2996: +// SIMD-ONLY0-NEXT: [[TMP1397:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END2997]] +// SIMD-ONLY0: cond.end2997: +// SIMD-ONLY0-NEXT: [[COND2998:%.*]] = phi i32 [ [[TMP1396]], [[COND_TRUE2995]] ], [ [[TMP1397]], [[COND_FALSE2996]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND2998]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1398:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1399:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2999:%.*]] = icmp ult i32 [[TMP1398]], [[TMP1399]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2999]], label [[COND_TRUE3001:%.*]], label [[COND_FALSE3002:%.*]] +// SIMD-ONLY0: cond.true3001: +// SIMD-ONLY0-NEXT: [[TMP1400:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3003:%.*]] +// SIMD-ONLY0: cond.false3002: +// SIMD-ONLY0-NEXT: [[TMP1401:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3003]] +// SIMD-ONLY0: cond.end3003: +// SIMD-ONLY0-NEXT: [[COND3004:%.*]] = phi i32 [ [[TMP1400]], [[COND_TRUE3001]] ], [ [[TMP1401]], [[COND_FALSE3002]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3004]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1402:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1403:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3005:%.*]] = icmp ugt i32 [[TMP1402]], [[TMP1403]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3005]], label [[IF_THEN3007:%.*]], label [[IF_END3008:%.*]] +// SIMD-ONLY0: if.then3007: +// SIMD-ONLY0-NEXT: [[TMP1404:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1404]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3008]] +// SIMD-ONLY0: if.end3008: +// SIMD-ONLY0-NEXT: [[TMP1405:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1406:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3009:%.*]] = icmp ult i32 [[TMP1405]], [[TMP1406]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3009]], label [[IF_THEN3011:%.*]], label [[IF_END3012:%.*]] +// SIMD-ONLY0: if.then3011: +// SIMD-ONLY0-NEXT: [[TMP1407:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1407]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3012]] +// SIMD-ONLY0: if.end3012: +// SIMD-ONLY0-NEXT: [[TMP1408:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1409:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3013:%.*]] = icmp ugt i32 [[TMP1408]], [[TMP1409]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3013]], label [[IF_THEN3015:%.*]], label [[IF_END3016:%.*]] +// SIMD-ONLY0: if.then3015: +// SIMD-ONLY0-NEXT: [[TMP1410:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1410]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3016]] +// SIMD-ONLY0: if.end3016: +// SIMD-ONLY0-NEXT: [[TMP1411:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1412:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3017:%.*]] = icmp ult i32 [[TMP1411]], [[TMP1412]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3017]], label [[IF_THEN3019:%.*]], label [[IF_END3020:%.*]] +// SIMD-ONLY0: if.then3019: +// SIMD-ONLY0-NEXT: [[TMP1413:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1413]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3020]] +// SIMD-ONLY0: if.end3020: +// SIMD-ONLY0-NEXT: [[TMP1414:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1415:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3021:%.*]] = icmp eq i32 [[TMP1414]], [[TMP1415]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3021]], label [[COND_TRUE3023:%.*]], label [[COND_FALSE3024:%.*]] +// SIMD-ONLY0: cond.true3023: +// SIMD-ONLY0-NEXT: [[TMP1416:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3025:%.*]] +// SIMD-ONLY0: cond.false3024: +// SIMD-ONLY0-NEXT: [[TMP1417:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3025]] +// SIMD-ONLY0: cond.end3025: +// SIMD-ONLY0-NEXT: [[COND3026:%.*]] = phi i32 [ [[TMP1416]], [[COND_TRUE3023]] ], [ [[TMP1417]], [[COND_FALSE3024]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3026]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1418:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1419:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3027:%.*]] = icmp eq i32 [[TMP1418]], [[TMP1419]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3027]], label [[COND_TRUE3029:%.*]], label [[COND_FALSE3030:%.*]] +// SIMD-ONLY0: cond.true3029: +// SIMD-ONLY0-NEXT: [[TMP1420:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3031:%.*]] +// SIMD-ONLY0: cond.false3030: +// SIMD-ONLY0-NEXT: [[TMP1421:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3031]] +// SIMD-ONLY0: cond.end3031: +// SIMD-ONLY0-NEXT: [[COND3032:%.*]] = phi i32 [ [[TMP1420]], [[COND_TRUE3029]] ], [ [[TMP1421]], [[COND_FALSE3030]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3032]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1422:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1423:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3033:%.*]] = icmp eq i32 [[TMP1422]], [[TMP1423]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3033]], label [[IF_THEN3035:%.*]], label [[IF_END3036:%.*]] +// SIMD-ONLY0: if.then3035: +// SIMD-ONLY0-NEXT: [[TMP1424:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1424]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3036]] +// SIMD-ONLY0: if.end3036: +// SIMD-ONLY0-NEXT: [[TMP1425:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1426:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3037:%.*]] = icmp eq i32 [[TMP1425]], [[TMP1426]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3037]], label [[IF_THEN3039:%.*]], label [[IF_END3040:%.*]] +// SIMD-ONLY0: if.then3039: +// SIMD-ONLY0-NEXT: [[TMP1427:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1427]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3040]] +// SIMD-ONLY0: if.end3040: +// SIMD-ONLY0-NEXT: [[TMP1428:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1429:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3041:%.*]] = icmp sgt i32 [[TMP1428]], [[TMP1429]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3041]], label [[COND_TRUE3043:%.*]], label [[COND_FALSE3044:%.*]] +// SIMD-ONLY0: cond.true3043: +// SIMD-ONLY0-NEXT: [[TMP1430:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3045:%.*]] +// SIMD-ONLY0: cond.false3044: +// SIMD-ONLY0-NEXT: [[TMP1431:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3045]] +// SIMD-ONLY0: cond.end3045: +// SIMD-ONLY0-NEXT: [[COND3046:%.*]] = phi i32 [ [[TMP1430]], [[COND_TRUE3043]] ], [ [[TMP1431]], [[COND_FALSE3044]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3046]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1432:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1433:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3047:%.*]] = icmp slt i32 [[TMP1432]], [[TMP1433]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3047]], label [[COND_TRUE3049:%.*]], label [[COND_FALSE3050:%.*]] +// SIMD-ONLY0: cond.true3049: +// SIMD-ONLY0-NEXT: [[TMP1434:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3051:%.*]] +// SIMD-ONLY0: cond.false3050: +// SIMD-ONLY0-NEXT: [[TMP1435:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3051]] +// SIMD-ONLY0: cond.end3051: +// SIMD-ONLY0-NEXT: [[COND3052:%.*]] = phi i32 [ [[TMP1434]], [[COND_TRUE3049]] ], [ [[TMP1435]], [[COND_FALSE3050]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3052]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1436:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1437:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3053:%.*]] = icmp sgt i32 [[TMP1436]], [[TMP1437]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3053]], label [[COND_TRUE3055:%.*]], label [[COND_FALSE3056:%.*]] +// SIMD-ONLY0: cond.true3055: +// SIMD-ONLY0-NEXT: [[TMP1438:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3057:%.*]] +// SIMD-ONLY0: cond.false3056: +// SIMD-ONLY0-NEXT: [[TMP1439:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3057]] +// SIMD-ONLY0: cond.end3057: +// SIMD-ONLY0-NEXT: [[COND3058:%.*]] = phi i32 [ [[TMP1438]], [[COND_TRUE3055]] ], [ [[TMP1439]], [[COND_FALSE3056]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3058]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1440:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1441:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3059:%.*]] = icmp slt i32 [[TMP1440]], [[TMP1441]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3059]], label [[COND_TRUE3061:%.*]], label [[COND_FALSE3062:%.*]] +// SIMD-ONLY0: cond.true3061: +// SIMD-ONLY0-NEXT: [[TMP1442:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3063:%.*]] +// SIMD-ONLY0: cond.false3062: +// SIMD-ONLY0-NEXT: [[TMP1443:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3063]] +// SIMD-ONLY0: cond.end3063: +// SIMD-ONLY0-NEXT: [[COND3064:%.*]] = phi i32 [ [[TMP1442]], [[COND_TRUE3061]] ], [ [[TMP1443]], [[COND_FALSE3062]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3064]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1444:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1445:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3065:%.*]] = icmp sgt i32 [[TMP1444]], [[TMP1445]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3065]], label [[IF_THEN3067:%.*]], label [[IF_END3068:%.*]] +// SIMD-ONLY0: if.then3067: +// SIMD-ONLY0-NEXT: [[TMP1446:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1446]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3068]] +// SIMD-ONLY0: if.end3068: +// SIMD-ONLY0-NEXT: [[TMP1447:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1448:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3069:%.*]] = icmp slt i32 [[TMP1447]], [[TMP1448]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3069]], label [[IF_THEN3071:%.*]], label [[IF_END3072:%.*]] +// SIMD-ONLY0: if.then3071: +// SIMD-ONLY0-NEXT: [[TMP1449:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1449]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3072]] +// SIMD-ONLY0: if.end3072: +// SIMD-ONLY0-NEXT: [[TMP1450:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1451:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3073:%.*]] = icmp sgt i32 [[TMP1450]], [[TMP1451]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3073]], label [[IF_THEN3075:%.*]], label [[IF_END3076:%.*]] +// SIMD-ONLY0: if.then3075: +// SIMD-ONLY0-NEXT: [[TMP1452:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1452]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3076]] +// SIMD-ONLY0: if.end3076: +// SIMD-ONLY0-NEXT: [[TMP1453:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1454:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3077:%.*]] = icmp slt i32 [[TMP1453]], [[TMP1454]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3077]], label [[IF_THEN3079:%.*]], label [[IF_END3080:%.*]] +// SIMD-ONLY0: if.then3079: +// SIMD-ONLY0-NEXT: [[TMP1455:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1455]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3080]] +// SIMD-ONLY0: if.end3080: +// SIMD-ONLY0-NEXT: [[TMP1456:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1457:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3081:%.*]] = icmp eq i32 [[TMP1456]], [[TMP1457]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3081]], label [[COND_TRUE3083:%.*]], label [[COND_FALSE3084:%.*]] +// SIMD-ONLY0: cond.true3083: +// SIMD-ONLY0-NEXT: [[TMP1458:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3085:%.*]] +// SIMD-ONLY0: cond.false3084: +// SIMD-ONLY0-NEXT: [[TMP1459:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3085]] +// SIMD-ONLY0: cond.end3085: +// SIMD-ONLY0-NEXT: [[COND3086:%.*]] = phi i32 [ [[TMP1458]], [[COND_TRUE3083]] ], [ [[TMP1459]], [[COND_FALSE3084]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3086]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1460:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1461:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3087:%.*]] = icmp eq i32 [[TMP1460]], [[TMP1461]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3087]], label [[COND_TRUE3089:%.*]], label [[COND_FALSE3090:%.*]] +// SIMD-ONLY0: cond.true3089: +// SIMD-ONLY0-NEXT: [[TMP1462:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3091:%.*]] +// SIMD-ONLY0: cond.false3090: +// SIMD-ONLY0-NEXT: [[TMP1463:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3091]] +// SIMD-ONLY0: cond.end3091: +// SIMD-ONLY0-NEXT: [[COND3092:%.*]] = phi i32 [ [[TMP1462]], [[COND_TRUE3089]] ], [ [[TMP1463]], [[COND_FALSE3090]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3092]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1464:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1465:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3093:%.*]] = icmp eq i32 [[TMP1464]], [[TMP1465]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3093]], label [[IF_THEN3095:%.*]], label [[IF_END3096:%.*]] +// SIMD-ONLY0: if.then3095: +// SIMD-ONLY0-NEXT: [[TMP1466:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1466]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3096]] +// SIMD-ONLY0: if.end3096: +// SIMD-ONLY0-NEXT: [[TMP1467:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1468:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3097:%.*]] = icmp eq i32 [[TMP1467]], [[TMP1468]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3097]], label [[IF_THEN3099:%.*]], label [[IF_END3100:%.*]] +// SIMD-ONLY0: if.then3099: +// SIMD-ONLY0-NEXT: [[TMP1469:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1469]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3100]] +// SIMD-ONLY0: if.end3100: +// SIMD-ONLY0-NEXT: [[TMP1470:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1471:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3101:%.*]] = icmp ugt i32 [[TMP1470]], [[TMP1471]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3101]], label [[COND_TRUE3103:%.*]], label [[COND_FALSE3104:%.*]] +// SIMD-ONLY0: cond.true3103: +// SIMD-ONLY0-NEXT: [[TMP1472:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3105:%.*]] +// SIMD-ONLY0: cond.false3104: +// SIMD-ONLY0-NEXT: [[TMP1473:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3105]] +// SIMD-ONLY0: cond.end3105: +// SIMD-ONLY0-NEXT: [[COND3106:%.*]] = phi i32 [ [[TMP1472]], [[COND_TRUE3103]] ], [ [[TMP1473]], [[COND_FALSE3104]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3106]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1474:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1475:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3107:%.*]] = icmp ult i32 [[TMP1474]], [[TMP1475]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3107]], label [[COND_TRUE3109:%.*]], label [[COND_FALSE3110:%.*]] +// SIMD-ONLY0: cond.true3109: +// SIMD-ONLY0-NEXT: [[TMP1476:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3111:%.*]] +// SIMD-ONLY0: cond.false3110: +// SIMD-ONLY0-NEXT: [[TMP1477:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3111]] +// SIMD-ONLY0: cond.end3111: +// SIMD-ONLY0-NEXT: [[COND3112:%.*]] = phi i32 [ [[TMP1476]], [[COND_TRUE3109]] ], [ [[TMP1477]], [[COND_FALSE3110]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3112]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1478:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1479:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3113:%.*]] = icmp ugt i32 [[TMP1478]], [[TMP1479]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3113]], label [[COND_TRUE3115:%.*]], label [[COND_FALSE3116:%.*]] +// SIMD-ONLY0: cond.true3115: +// SIMD-ONLY0-NEXT: [[TMP1480:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3117:%.*]] +// SIMD-ONLY0: cond.false3116: +// SIMD-ONLY0-NEXT: [[TMP1481:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3117]] +// SIMD-ONLY0: cond.end3117: +// SIMD-ONLY0-NEXT: [[COND3118:%.*]] = phi i32 [ [[TMP1480]], [[COND_TRUE3115]] ], [ [[TMP1481]], [[COND_FALSE3116]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3118]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1482:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1483:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3119:%.*]] = icmp ult i32 [[TMP1482]], [[TMP1483]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3119]], label [[COND_TRUE3121:%.*]], label [[COND_FALSE3122:%.*]] +// SIMD-ONLY0: cond.true3121: +// SIMD-ONLY0-NEXT: [[TMP1484:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3123:%.*]] +// SIMD-ONLY0: cond.false3122: +// SIMD-ONLY0-NEXT: [[TMP1485:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3123]] +// SIMD-ONLY0: cond.end3123: +// SIMD-ONLY0-NEXT: [[COND3124:%.*]] = phi i32 [ [[TMP1484]], [[COND_TRUE3121]] ], [ [[TMP1485]], [[COND_FALSE3122]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3124]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1486:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1487:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3125:%.*]] = icmp ugt i32 [[TMP1486]], [[TMP1487]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3125]], label [[IF_THEN3127:%.*]], label [[IF_END3128:%.*]] +// SIMD-ONLY0: if.then3127: +// SIMD-ONLY0-NEXT: [[TMP1488:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1488]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3128]] +// SIMD-ONLY0: if.end3128: +// SIMD-ONLY0-NEXT: [[TMP1489:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1490:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3129:%.*]] = icmp ult i32 [[TMP1489]], [[TMP1490]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3129]], label [[IF_THEN3131:%.*]], label [[IF_END3132:%.*]] +// SIMD-ONLY0: if.then3131: +// SIMD-ONLY0-NEXT: [[TMP1491:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1491]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3132]] +// SIMD-ONLY0: if.end3132: +// SIMD-ONLY0-NEXT: [[TMP1492:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1493:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3133:%.*]] = icmp ugt i32 [[TMP1492]], [[TMP1493]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3133]], label [[IF_THEN3135:%.*]], label [[IF_END3136:%.*]] +// SIMD-ONLY0: if.then3135: +// SIMD-ONLY0-NEXT: [[TMP1494:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1494]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3136]] +// SIMD-ONLY0: if.end3136: +// SIMD-ONLY0-NEXT: [[TMP1495:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1496:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3137:%.*]] = icmp ult i32 [[TMP1495]], [[TMP1496]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3137]], label [[IF_THEN3139:%.*]], label [[IF_END3140:%.*]] +// SIMD-ONLY0: if.then3139: +// SIMD-ONLY0-NEXT: [[TMP1497:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1497]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3140]] +// SIMD-ONLY0: if.end3140: +// SIMD-ONLY0-NEXT: [[TMP1498:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1499:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3141:%.*]] = icmp eq i32 [[TMP1498]], [[TMP1499]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3141]], label [[COND_TRUE3143:%.*]], label [[COND_FALSE3144:%.*]] +// SIMD-ONLY0: cond.true3143: +// SIMD-ONLY0-NEXT: [[TMP1500:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3145:%.*]] +// SIMD-ONLY0: cond.false3144: +// SIMD-ONLY0-NEXT: [[TMP1501:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3145]] +// SIMD-ONLY0: cond.end3145: +// SIMD-ONLY0-NEXT: [[COND3146:%.*]] = phi i32 [ [[TMP1500]], [[COND_TRUE3143]] ], [ [[TMP1501]], [[COND_FALSE3144]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3146]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1502:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1503:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3147:%.*]] = icmp eq i32 [[TMP1502]], [[TMP1503]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3147]], label [[COND_TRUE3149:%.*]], label [[COND_FALSE3150:%.*]] +// SIMD-ONLY0: cond.true3149: +// SIMD-ONLY0-NEXT: [[TMP1504:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3151:%.*]] +// SIMD-ONLY0: cond.false3150: +// SIMD-ONLY0-NEXT: [[TMP1505:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END3151]] +// SIMD-ONLY0: cond.end3151: +// SIMD-ONLY0-NEXT: [[COND3152:%.*]] = phi i32 [ [[TMP1504]], [[COND_TRUE3149]] ], [ [[TMP1505]], [[COND_FALSE3150]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND3152]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1506:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1507:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3153:%.*]] = icmp eq i32 [[TMP1506]], [[TMP1507]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3153]], label [[IF_THEN3155:%.*]], label [[IF_END3156:%.*]] +// SIMD-ONLY0: if.then3155: +// SIMD-ONLY0-NEXT: [[TMP1508:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1508]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3156]] +// SIMD-ONLY0: if.end3156: +// SIMD-ONLY0-NEXT: [[TMP1509:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1510:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3157:%.*]] = icmp eq i32 [[TMP1509]], [[TMP1510]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3157]], label [[IF_THEN3159:%.*]], label [[IF_END3160:%.*]] +// SIMD-ONLY0: if.then3159: +// SIMD-ONLY0-NEXT: [[TMP1511:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1511]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3160]] +// SIMD-ONLY0: if.end3160: +// SIMD-ONLY0-NEXT: [[TMP1512:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1513:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3161:%.*]] = icmp sgt i64 [[TMP1512]], [[TMP1513]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3161]], label [[COND_TRUE3163:%.*]], label [[COND_FALSE3164:%.*]] +// SIMD-ONLY0: cond.true3163: +// SIMD-ONLY0-NEXT: [[TMP1514:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3165:%.*]] +// SIMD-ONLY0: cond.false3164: +// SIMD-ONLY0-NEXT: [[TMP1515:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3165]] +// SIMD-ONLY0: cond.end3165: +// SIMD-ONLY0-NEXT: [[COND3166:%.*]] = phi i64 [ [[TMP1514]], [[COND_TRUE3163]] ], [ [[TMP1515]], [[COND_FALSE3164]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3166]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1516:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1517:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3167:%.*]] = icmp slt i64 [[TMP1516]], [[TMP1517]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3167]], label [[COND_TRUE3169:%.*]], label [[COND_FALSE3170:%.*]] +// SIMD-ONLY0: cond.true3169: +// SIMD-ONLY0-NEXT: [[TMP1518:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3171:%.*]] +// SIMD-ONLY0: cond.false3170: +// SIMD-ONLY0-NEXT: [[TMP1519:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3171]] +// SIMD-ONLY0: cond.end3171: +// SIMD-ONLY0-NEXT: [[COND3172:%.*]] = phi i64 [ [[TMP1518]], [[COND_TRUE3169]] ], [ [[TMP1519]], [[COND_FALSE3170]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3172]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1520:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1521:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3173:%.*]] = icmp sgt i64 [[TMP1520]], [[TMP1521]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3173]], label [[COND_TRUE3175:%.*]], label [[COND_FALSE3176:%.*]] +// SIMD-ONLY0: cond.true3175: +// SIMD-ONLY0-NEXT: [[TMP1522:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3177:%.*]] +// SIMD-ONLY0: cond.false3176: +// SIMD-ONLY0-NEXT: [[TMP1523:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3177]] +// SIMD-ONLY0: cond.end3177: +// SIMD-ONLY0-NEXT: [[COND3178:%.*]] = phi i64 [ [[TMP1522]], [[COND_TRUE3175]] ], [ [[TMP1523]], [[COND_FALSE3176]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3178]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1524:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1525:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3179:%.*]] = icmp slt i64 [[TMP1524]], [[TMP1525]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3179]], label [[COND_TRUE3181:%.*]], label [[COND_FALSE3182:%.*]] +// SIMD-ONLY0: cond.true3181: +// SIMD-ONLY0-NEXT: [[TMP1526:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3183:%.*]] +// SIMD-ONLY0: cond.false3182: +// SIMD-ONLY0-NEXT: [[TMP1527:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3183]] +// SIMD-ONLY0: cond.end3183: +// SIMD-ONLY0-NEXT: [[COND3184:%.*]] = phi i64 [ [[TMP1526]], [[COND_TRUE3181]] ], [ [[TMP1527]], [[COND_FALSE3182]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3184]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1528:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1529:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3185:%.*]] = icmp sgt i64 [[TMP1528]], [[TMP1529]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3185]], label [[IF_THEN3187:%.*]], label [[IF_END3188:%.*]] +// SIMD-ONLY0: if.then3187: +// SIMD-ONLY0-NEXT: [[TMP1530:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1530]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3188]] +// SIMD-ONLY0: if.end3188: +// SIMD-ONLY0-NEXT: [[TMP1531:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1532:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3189:%.*]] = icmp slt i64 [[TMP1531]], [[TMP1532]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3189]], label [[IF_THEN3191:%.*]], label [[IF_END3192:%.*]] +// SIMD-ONLY0: if.then3191: +// SIMD-ONLY0-NEXT: [[TMP1533:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1533]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3192]] +// SIMD-ONLY0: if.end3192: +// SIMD-ONLY0-NEXT: [[TMP1534:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1535:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3193:%.*]] = icmp sgt i64 [[TMP1534]], [[TMP1535]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3193]], label [[IF_THEN3195:%.*]], label [[IF_END3196:%.*]] +// SIMD-ONLY0: if.then3195: +// SIMD-ONLY0-NEXT: [[TMP1536:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1536]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3196]] +// SIMD-ONLY0: if.end3196: +// SIMD-ONLY0-NEXT: [[TMP1537:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1538:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3197:%.*]] = icmp slt i64 [[TMP1537]], [[TMP1538]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3197]], label [[IF_THEN3199:%.*]], label [[IF_END3200:%.*]] +// SIMD-ONLY0: if.then3199: +// SIMD-ONLY0-NEXT: [[TMP1539:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1539]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3200]] +// SIMD-ONLY0: if.end3200: +// SIMD-ONLY0-NEXT: [[TMP1540:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1541:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3201:%.*]] = icmp eq i64 [[TMP1540]], [[TMP1541]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3201]], label [[COND_TRUE3203:%.*]], label [[COND_FALSE3204:%.*]] +// SIMD-ONLY0: cond.true3203: +// SIMD-ONLY0-NEXT: [[TMP1542:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3205:%.*]] +// SIMD-ONLY0: cond.false3204: +// SIMD-ONLY0-NEXT: [[TMP1543:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3205]] +// SIMD-ONLY0: cond.end3205: +// SIMD-ONLY0-NEXT: [[COND3206:%.*]] = phi i64 [ [[TMP1542]], [[COND_TRUE3203]] ], [ [[TMP1543]], [[COND_FALSE3204]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3206]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1544:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1545:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3207:%.*]] = icmp eq i64 [[TMP1544]], [[TMP1545]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3207]], label [[COND_TRUE3209:%.*]], label [[COND_FALSE3210:%.*]] +// SIMD-ONLY0: cond.true3209: +// SIMD-ONLY0-NEXT: [[TMP1546:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3211:%.*]] +// SIMD-ONLY0: cond.false3210: +// SIMD-ONLY0-NEXT: [[TMP1547:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3211]] +// SIMD-ONLY0: cond.end3211: +// SIMD-ONLY0-NEXT: [[COND3212:%.*]] = phi i64 [ [[TMP1546]], [[COND_TRUE3209]] ], [ [[TMP1547]], [[COND_FALSE3210]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3212]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1548:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1549:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3213:%.*]] = icmp eq i64 [[TMP1548]], [[TMP1549]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3213]], label [[IF_THEN3215:%.*]], label [[IF_END3216:%.*]] +// SIMD-ONLY0: if.then3215: +// SIMD-ONLY0-NEXT: [[TMP1550:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1550]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3216]] +// SIMD-ONLY0: if.end3216: +// SIMD-ONLY0-NEXT: [[TMP1551:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1552:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3217:%.*]] = icmp eq i64 [[TMP1551]], [[TMP1552]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3217]], label [[IF_THEN3219:%.*]], label [[IF_END3220:%.*]] +// SIMD-ONLY0: if.then3219: +// SIMD-ONLY0-NEXT: [[TMP1553:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1553]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3220]] +// SIMD-ONLY0: if.end3220: +// SIMD-ONLY0-NEXT: [[TMP1554:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1555:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3221:%.*]] = icmp ugt i64 [[TMP1554]], [[TMP1555]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3221]], label [[COND_TRUE3223:%.*]], label [[COND_FALSE3224:%.*]] +// SIMD-ONLY0: cond.true3223: +// SIMD-ONLY0-NEXT: [[TMP1556:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3225:%.*]] +// SIMD-ONLY0: cond.false3224: +// SIMD-ONLY0-NEXT: [[TMP1557:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3225]] +// SIMD-ONLY0: cond.end3225: +// SIMD-ONLY0-NEXT: [[COND3226:%.*]] = phi i64 [ [[TMP1556]], [[COND_TRUE3223]] ], [ [[TMP1557]], [[COND_FALSE3224]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3226]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1558:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1559:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3227:%.*]] = icmp ult i64 [[TMP1558]], [[TMP1559]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3227]], label [[COND_TRUE3229:%.*]], label [[COND_FALSE3230:%.*]] +// SIMD-ONLY0: cond.true3229: +// SIMD-ONLY0-NEXT: [[TMP1560:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3231:%.*]] +// SIMD-ONLY0: cond.false3230: +// SIMD-ONLY0-NEXT: [[TMP1561:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3231]] +// SIMD-ONLY0: cond.end3231: +// SIMD-ONLY0-NEXT: [[COND3232:%.*]] = phi i64 [ [[TMP1560]], [[COND_TRUE3229]] ], [ [[TMP1561]], [[COND_FALSE3230]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3232]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1562:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1563:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3233:%.*]] = icmp ugt i64 [[TMP1562]], [[TMP1563]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3233]], label [[COND_TRUE3235:%.*]], label [[COND_FALSE3236:%.*]] +// SIMD-ONLY0: cond.true3235: +// SIMD-ONLY0-NEXT: [[TMP1564:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3237:%.*]] +// SIMD-ONLY0: cond.false3236: +// SIMD-ONLY0-NEXT: [[TMP1565:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3237]] +// SIMD-ONLY0: cond.end3237: +// SIMD-ONLY0-NEXT: [[COND3238:%.*]] = phi i64 [ [[TMP1564]], [[COND_TRUE3235]] ], [ [[TMP1565]], [[COND_FALSE3236]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3238]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1566:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1567:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3239:%.*]] = icmp ult i64 [[TMP1566]], [[TMP1567]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3239]], label [[COND_TRUE3241:%.*]], label [[COND_FALSE3242:%.*]] +// SIMD-ONLY0: cond.true3241: +// SIMD-ONLY0-NEXT: [[TMP1568:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3243:%.*]] +// SIMD-ONLY0: cond.false3242: +// SIMD-ONLY0-NEXT: [[TMP1569:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3243]] +// SIMD-ONLY0: cond.end3243: +// SIMD-ONLY0-NEXT: [[COND3244:%.*]] = phi i64 [ [[TMP1568]], [[COND_TRUE3241]] ], [ [[TMP1569]], [[COND_FALSE3242]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3244]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1570:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1571:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3245:%.*]] = icmp ugt i64 [[TMP1570]], [[TMP1571]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3245]], label [[IF_THEN3247:%.*]], label [[IF_END3248:%.*]] +// SIMD-ONLY0: if.then3247: +// SIMD-ONLY0-NEXT: [[TMP1572:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1572]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3248]] +// SIMD-ONLY0: if.end3248: +// SIMD-ONLY0-NEXT: [[TMP1573:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1574:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3249:%.*]] = icmp ult i64 [[TMP1573]], [[TMP1574]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3249]], label [[IF_THEN3251:%.*]], label [[IF_END3252:%.*]] +// SIMD-ONLY0: if.then3251: +// SIMD-ONLY0-NEXT: [[TMP1575:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1575]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3252]] +// SIMD-ONLY0: if.end3252: +// SIMD-ONLY0-NEXT: [[TMP1576:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1577:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3253:%.*]] = icmp ugt i64 [[TMP1576]], [[TMP1577]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3253]], label [[IF_THEN3255:%.*]], label [[IF_END3256:%.*]] +// SIMD-ONLY0: if.then3255: +// SIMD-ONLY0-NEXT: [[TMP1578:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1578]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3256]] +// SIMD-ONLY0: if.end3256: +// SIMD-ONLY0-NEXT: [[TMP1579:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1580:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3257:%.*]] = icmp ult i64 [[TMP1579]], [[TMP1580]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3257]], label [[IF_THEN3259:%.*]], label [[IF_END3260:%.*]] +// SIMD-ONLY0: if.then3259: +// SIMD-ONLY0-NEXT: [[TMP1581:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1581]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3260]] +// SIMD-ONLY0: if.end3260: +// SIMD-ONLY0-NEXT: [[TMP1582:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1583:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3261:%.*]] = icmp eq i64 [[TMP1582]], [[TMP1583]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3261]], label [[COND_TRUE3263:%.*]], label [[COND_FALSE3264:%.*]] +// SIMD-ONLY0: cond.true3263: +// SIMD-ONLY0-NEXT: [[TMP1584:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3265:%.*]] +// SIMD-ONLY0: cond.false3264: +// SIMD-ONLY0-NEXT: [[TMP1585:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3265]] +// SIMD-ONLY0: cond.end3265: +// SIMD-ONLY0-NEXT: [[COND3266:%.*]] = phi i64 [ [[TMP1584]], [[COND_TRUE3263]] ], [ [[TMP1585]], [[COND_FALSE3264]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3266]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1586:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1587:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3267:%.*]] = icmp eq i64 [[TMP1586]], [[TMP1587]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3267]], label [[COND_TRUE3269:%.*]], label [[COND_FALSE3270:%.*]] +// SIMD-ONLY0: cond.true3269: +// SIMD-ONLY0-NEXT: [[TMP1588:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3271:%.*]] +// SIMD-ONLY0: cond.false3270: +// SIMD-ONLY0-NEXT: [[TMP1589:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3271]] +// SIMD-ONLY0: cond.end3271: +// SIMD-ONLY0-NEXT: [[COND3272:%.*]] = phi i64 [ [[TMP1588]], [[COND_TRUE3269]] ], [ [[TMP1589]], [[COND_FALSE3270]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3272]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1590:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1591:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3273:%.*]] = icmp eq i64 [[TMP1590]], [[TMP1591]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3273]], label [[IF_THEN3275:%.*]], label [[IF_END3276:%.*]] +// SIMD-ONLY0: if.then3275: +// SIMD-ONLY0-NEXT: [[TMP1592:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1592]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3276]] +// SIMD-ONLY0: if.end3276: +// SIMD-ONLY0-NEXT: [[TMP1593:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1594:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3277:%.*]] = icmp eq i64 [[TMP1593]], [[TMP1594]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3277]], label [[IF_THEN3279:%.*]], label [[IF_END3280:%.*]] +// SIMD-ONLY0: if.then3279: +// SIMD-ONLY0-NEXT: [[TMP1595:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1595]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3280]] +// SIMD-ONLY0: if.end3280: +// SIMD-ONLY0-NEXT: [[TMP1596:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1597:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3281:%.*]] = icmp sgt i64 [[TMP1596]], [[TMP1597]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3281]], label [[COND_TRUE3283:%.*]], label [[COND_FALSE3284:%.*]] +// SIMD-ONLY0: cond.true3283: +// SIMD-ONLY0-NEXT: [[TMP1598:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3285:%.*]] +// SIMD-ONLY0: cond.false3284: +// SIMD-ONLY0-NEXT: [[TMP1599:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3285]] +// SIMD-ONLY0: cond.end3285: +// SIMD-ONLY0-NEXT: [[COND3286:%.*]] = phi i64 [ [[TMP1598]], [[COND_TRUE3283]] ], [ [[TMP1599]], [[COND_FALSE3284]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3286]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1600:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1601:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3287:%.*]] = icmp slt i64 [[TMP1600]], [[TMP1601]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3287]], label [[COND_TRUE3289:%.*]], label [[COND_FALSE3290:%.*]] +// SIMD-ONLY0: cond.true3289: +// SIMD-ONLY0-NEXT: [[TMP1602:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3291:%.*]] +// SIMD-ONLY0: cond.false3290: +// SIMD-ONLY0-NEXT: [[TMP1603:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3291]] +// SIMD-ONLY0: cond.end3291: +// SIMD-ONLY0-NEXT: [[COND3292:%.*]] = phi i64 [ [[TMP1602]], [[COND_TRUE3289]] ], [ [[TMP1603]], [[COND_FALSE3290]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3292]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1604:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1605:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3293:%.*]] = icmp sgt i64 [[TMP1604]], [[TMP1605]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3293]], label [[COND_TRUE3295:%.*]], label [[COND_FALSE3296:%.*]] +// SIMD-ONLY0: cond.true3295: +// SIMD-ONLY0-NEXT: [[TMP1606:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3297:%.*]] +// SIMD-ONLY0: cond.false3296: +// SIMD-ONLY0-NEXT: [[TMP1607:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3297]] +// SIMD-ONLY0: cond.end3297: +// SIMD-ONLY0-NEXT: [[COND3298:%.*]] = phi i64 [ [[TMP1606]], [[COND_TRUE3295]] ], [ [[TMP1607]], [[COND_FALSE3296]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3298]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1608:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1609:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3299:%.*]] = icmp slt i64 [[TMP1608]], [[TMP1609]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3299]], label [[COND_TRUE3301:%.*]], label [[COND_FALSE3302:%.*]] +// SIMD-ONLY0: cond.true3301: +// SIMD-ONLY0-NEXT: [[TMP1610:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3303:%.*]] +// SIMD-ONLY0: cond.false3302: +// SIMD-ONLY0-NEXT: [[TMP1611:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3303]] +// SIMD-ONLY0: cond.end3303: +// SIMD-ONLY0-NEXT: [[COND3304:%.*]] = phi i64 [ [[TMP1610]], [[COND_TRUE3301]] ], [ [[TMP1611]], [[COND_FALSE3302]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3304]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1612:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1613:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3305:%.*]] = icmp sgt i64 [[TMP1612]], [[TMP1613]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3305]], label [[IF_THEN3307:%.*]], label [[IF_END3308:%.*]] +// SIMD-ONLY0: if.then3307: +// SIMD-ONLY0-NEXT: [[TMP1614:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1614]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3308]] +// SIMD-ONLY0: if.end3308: +// SIMD-ONLY0-NEXT: [[TMP1615:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1616:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3309:%.*]] = icmp slt i64 [[TMP1615]], [[TMP1616]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3309]], label [[IF_THEN3311:%.*]], label [[IF_END3312:%.*]] +// SIMD-ONLY0: if.then3311: +// SIMD-ONLY0-NEXT: [[TMP1617:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1617]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3312]] +// SIMD-ONLY0: if.end3312: +// SIMD-ONLY0-NEXT: [[TMP1618:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1619:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3313:%.*]] = icmp sgt i64 [[TMP1618]], [[TMP1619]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3313]], label [[IF_THEN3315:%.*]], label [[IF_END3316:%.*]] +// SIMD-ONLY0: if.then3315: +// SIMD-ONLY0-NEXT: [[TMP1620:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1620]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3316]] +// SIMD-ONLY0: if.end3316: +// SIMD-ONLY0-NEXT: [[TMP1621:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1622:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3317:%.*]] = icmp slt i64 [[TMP1621]], [[TMP1622]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3317]], label [[IF_THEN3319:%.*]], label [[IF_END3320:%.*]] +// SIMD-ONLY0: if.then3319: +// SIMD-ONLY0-NEXT: [[TMP1623:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1623]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3320]] +// SIMD-ONLY0: if.end3320: +// SIMD-ONLY0-NEXT: [[TMP1624:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1625:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3321:%.*]] = icmp eq i64 [[TMP1624]], [[TMP1625]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3321]], label [[COND_TRUE3323:%.*]], label [[COND_FALSE3324:%.*]] +// SIMD-ONLY0: cond.true3323: +// SIMD-ONLY0-NEXT: [[TMP1626:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3325:%.*]] +// SIMD-ONLY0: cond.false3324: +// SIMD-ONLY0-NEXT: [[TMP1627:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3325]] +// SIMD-ONLY0: cond.end3325: +// SIMD-ONLY0-NEXT: [[COND3326:%.*]] = phi i64 [ [[TMP1626]], [[COND_TRUE3323]] ], [ [[TMP1627]], [[COND_FALSE3324]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3326]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1628:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1629:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3327:%.*]] = icmp eq i64 [[TMP1628]], [[TMP1629]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3327]], label [[COND_TRUE3329:%.*]], label [[COND_FALSE3330:%.*]] +// SIMD-ONLY0: cond.true3329: +// SIMD-ONLY0-NEXT: [[TMP1630:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3331:%.*]] +// SIMD-ONLY0: cond.false3330: +// SIMD-ONLY0-NEXT: [[TMP1631:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3331]] +// SIMD-ONLY0: cond.end3331: +// SIMD-ONLY0-NEXT: [[COND3332:%.*]] = phi i64 [ [[TMP1630]], [[COND_TRUE3329]] ], [ [[TMP1631]], [[COND_FALSE3330]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3332]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1632:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1633:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3333:%.*]] = icmp eq i64 [[TMP1632]], [[TMP1633]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3333]], label [[IF_THEN3335:%.*]], label [[IF_END3336:%.*]] +// SIMD-ONLY0: if.then3335: +// SIMD-ONLY0-NEXT: [[TMP1634:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1634]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3336]] +// SIMD-ONLY0: if.end3336: +// SIMD-ONLY0-NEXT: [[TMP1635:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1636:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3337:%.*]] = icmp eq i64 [[TMP1635]], [[TMP1636]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3337]], label [[IF_THEN3339:%.*]], label [[IF_END3340:%.*]] +// SIMD-ONLY0: if.then3339: +// SIMD-ONLY0-NEXT: [[TMP1637:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1637]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3340]] +// SIMD-ONLY0: if.end3340: +// SIMD-ONLY0-NEXT: [[TMP1638:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1639:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3341:%.*]] = icmp ugt i64 [[TMP1638]], [[TMP1639]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3341]], label [[COND_TRUE3343:%.*]], label [[COND_FALSE3344:%.*]] +// SIMD-ONLY0: cond.true3343: +// SIMD-ONLY0-NEXT: [[TMP1640:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3345:%.*]] +// SIMD-ONLY0: cond.false3344: +// SIMD-ONLY0-NEXT: [[TMP1641:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3345]] +// SIMD-ONLY0: cond.end3345: +// SIMD-ONLY0-NEXT: [[COND3346:%.*]] = phi i64 [ [[TMP1640]], [[COND_TRUE3343]] ], [ [[TMP1641]], [[COND_FALSE3344]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3346]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1642:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1643:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3347:%.*]] = icmp ult i64 [[TMP1642]], [[TMP1643]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3347]], label [[COND_TRUE3349:%.*]], label [[COND_FALSE3350:%.*]] +// SIMD-ONLY0: cond.true3349: +// SIMD-ONLY0-NEXT: [[TMP1644:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3351:%.*]] +// SIMD-ONLY0: cond.false3350: +// SIMD-ONLY0-NEXT: [[TMP1645:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3351]] +// SIMD-ONLY0: cond.end3351: +// SIMD-ONLY0-NEXT: [[COND3352:%.*]] = phi i64 [ [[TMP1644]], [[COND_TRUE3349]] ], [ [[TMP1645]], [[COND_FALSE3350]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3352]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1646:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1647:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3353:%.*]] = icmp ugt i64 [[TMP1646]], [[TMP1647]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3353]], label [[COND_TRUE3355:%.*]], label [[COND_FALSE3356:%.*]] +// SIMD-ONLY0: cond.true3355: +// SIMD-ONLY0-NEXT: [[TMP1648:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3357:%.*]] +// SIMD-ONLY0: cond.false3356: +// SIMD-ONLY0-NEXT: [[TMP1649:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3357]] +// SIMD-ONLY0: cond.end3357: +// SIMD-ONLY0-NEXT: [[COND3358:%.*]] = phi i64 [ [[TMP1648]], [[COND_TRUE3355]] ], [ [[TMP1649]], [[COND_FALSE3356]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3358]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1650:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1651:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3359:%.*]] = icmp ult i64 [[TMP1650]], [[TMP1651]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3359]], label [[COND_TRUE3361:%.*]], label [[COND_FALSE3362:%.*]] +// SIMD-ONLY0: cond.true3361: +// SIMD-ONLY0-NEXT: [[TMP1652:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3363:%.*]] +// SIMD-ONLY0: cond.false3362: +// SIMD-ONLY0-NEXT: [[TMP1653:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3363]] +// SIMD-ONLY0: cond.end3363: +// SIMD-ONLY0-NEXT: [[COND3364:%.*]] = phi i64 [ [[TMP1652]], [[COND_TRUE3361]] ], [ [[TMP1653]], [[COND_FALSE3362]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3364]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1654:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1655:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3365:%.*]] = icmp ugt i64 [[TMP1654]], [[TMP1655]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3365]], label [[IF_THEN3367:%.*]], label [[IF_END3368:%.*]] +// SIMD-ONLY0: if.then3367: +// SIMD-ONLY0-NEXT: [[TMP1656:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1656]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3368]] +// SIMD-ONLY0: if.end3368: +// SIMD-ONLY0-NEXT: [[TMP1657:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1658:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3369:%.*]] = icmp ult i64 [[TMP1657]], [[TMP1658]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3369]], label [[IF_THEN3371:%.*]], label [[IF_END3372:%.*]] +// SIMD-ONLY0: if.then3371: +// SIMD-ONLY0-NEXT: [[TMP1659:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1659]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3372]] +// SIMD-ONLY0: if.end3372: +// SIMD-ONLY0-NEXT: [[TMP1660:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1661:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3373:%.*]] = icmp ugt i64 [[TMP1660]], [[TMP1661]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3373]], label [[IF_THEN3375:%.*]], label [[IF_END3376:%.*]] +// SIMD-ONLY0: if.then3375: +// SIMD-ONLY0-NEXT: [[TMP1662:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1662]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3376]] +// SIMD-ONLY0: if.end3376: +// SIMD-ONLY0-NEXT: [[TMP1663:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1664:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3377:%.*]] = icmp ult i64 [[TMP1663]], [[TMP1664]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3377]], label [[IF_THEN3379:%.*]], label [[IF_END3380:%.*]] +// SIMD-ONLY0: if.then3379: +// SIMD-ONLY0-NEXT: [[TMP1665:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1665]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3380]] +// SIMD-ONLY0: if.end3380: +// SIMD-ONLY0-NEXT: [[TMP1666:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1667:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3381:%.*]] = icmp eq i64 [[TMP1666]], [[TMP1667]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3381]], label [[COND_TRUE3383:%.*]], label [[COND_FALSE3384:%.*]] +// SIMD-ONLY0: cond.true3383: +// SIMD-ONLY0-NEXT: [[TMP1668:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3385:%.*]] +// SIMD-ONLY0: cond.false3384: +// SIMD-ONLY0-NEXT: [[TMP1669:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3385]] +// SIMD-ONLY0: cond.end3385: +// SIMD-ONLY0-NEXT: [[COND3386:%.*]] = phi i64 [ [[TMP1668]], [[COND_TRUE3383]] ], [ [[TMP1669]], [[COND_FALSE3384]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3386]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1670:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1671:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3387:%.*]] = icmp eq i64 [[TMP1670]], [[TMP1671]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3387]], label [[COND_TRUE3389:%.*]], label [[COND_FALSE3390:%.*]] +// SIMD-ONLY0: cond.true3389: +// SIMD-ONLY0-NEXT: [[TMP1672:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3391:%.*]] +// SIMD-ONLY0: cond.false3390: +// SIMD-ONLY0-NEXT: [[TMP1673:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3391]] +// SIMD-ONLY0: cond.end3391: +// SIMD-ONLY0-NEXT: [[COND3392:%.*]] = phi i64 [ [[TMP1672]], [[COND_TRUE3389]] ], [ [[TMP1673]], [[COND_FALSE3390]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3392]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1674:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1675:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3393:%.*]] = icmp eq i64 [[TMP1674]], [[TMP1675]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3393]], label [[IF_THEN3395:%.*]], label [[IF_END3396:%.*]] +// SIMD-ONLY0: if.then3395: +// SIMD-ONLY0-NEXT: [[TMP1676:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1676]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3396]] +// SIMD-ONLY0: if.end3396: +// SIMD-ONLY0-NEXT: [[TMP1677:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1678:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3397:%.*]] = icmp eq i64 [[TMP1677]], [[TMP1678]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3397]], label [[IF_THEN3399:%.*]], label [[IF_END3400:%.*]] +// SIMD-ONLY0: if.then3399: +// SIMD-ONLY0-NEXT: [[TMP1679:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1679]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3400]] +// SIMD-ONLY0: if.end3400: +// SIMD-ONLY0-NEXT: [[TMP1680:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1681:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3401:%.*]] = icmp sgt i64 [[TMP1680]], [[TMP1681]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3401]], label [[COND_TRUE3403:%.*]], label [[COND_FALSE3404:%.*]] +// SIMD-ONLY0: cond.true3403: +// SIMD-ONLY0-NEXT: [[TMP1682:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3405:%.*]] +// SIMD-ONLY0: cond.false3404: +// SIMD-ONLY0-NEXT: [[TMP1683:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3405]] +// SIMD-ONLY0: cond.end3405: +// SIMD-ONLY0-NEXT: [[COND3406:%.*]] = phi i64 [ [[TMP1682]], [[COND_TRUE3403]] ], [ [[TMP1683]], [[COND_FALSE3404]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3406]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1684:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1685:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3407:%.*]] = icmp slt i64 [[TMP1684]], [[TMP1685]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3407]], label [[COND_TRUE3409:%.*]], label [[COND_FALSE3410:%.*]] +// SIMD-ONLY0: cond.true3409: +// SIMD-ONLY0-NEXT: [[TMP1686:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3411:%.*]] +// SIMD-ONLY0: cond.false3410: +// SIMD-ONLY0-NEXT: [[TMP1687:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3411]] +// SIMD-ONLY0: cond.end3411: +// SIMD-ONLY0-NEXT: [[COND3412:%.*]] = phi i64 [ [[TMP1686]], [[COND_TRUE3409]] ], [ [[TMP1687]], [[COND_FALSE3410]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3412]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1688:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1689:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3413:%.*]] = icmp sgt i64 [[TMP1688]], [[TMP1689]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3413]], label [[COND_TRUE3415:%.*]], label [[COND_FALSE3416:%.*]] +// SIMD-ONLY0: cond.true3415: +// SIMD-ONLY0-NEXT: [[TMP1690:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3417:%.*]] +// SIMD-ONLY0: cond.false3416: +// SIMD-ONLY0-NEXT: [[TMP1691:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3417]] +// SIMD-ONLY0: cond.end3417: +// SIMD-ONLY0-NEXT: [[COND3418:%.*]] = phi i64 [ [[TMP1690]], [[COND_TRUE3415]] ], [ [[TMP1691]], [[COND_FALSE3416]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3418]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1692:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1693:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3419:%.*]] = icmp slt i64 [[TMP1692]], [[TMP1693]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3419]], label [[COND_TRUE3421:%.*]], label [[COND_FALSE3422:%.*]] +// SIMD-ONLY0: cond.true3421: +// SIMD-ONLY0-NEXT: [[TMP1694:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3423:%.*]] +// SIMD-ONLY0: cond.false3422: +// SIMD-ONLY0-NEXT: [[TMP1695:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3423]] +// SIMD-ONLY0: cond.end3423: +// SIMD-ONLY0-NEXT: [[COND3424:%.*]] = phi i64 [ [[TMP1694]], [[COND_TRUE3421]] ], [ [[TMP1695]], [[COND_FALSE3422]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3424]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1696:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1697:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3425:%.*]] = icmp sgt i64 [[TMP1696]], [[TMP1697]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3425]], label [[IF_THEN3427:%.*]], label [[IF_END3428:%.*]] +// SIMD-ONLY0: if.then3427: +// SIMD-ONLY0-NEXT: [[TMP1698:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1698]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3428]] +// SIMD-ONLY0: if.end3428: +// SIMD-ONLY0-NEXT: [[TMP1699:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1700:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3429:%.*]] = icmp slt i64 [[TMP1699]], [[TMP1700]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3429]], label [[IF_THEN3431:%.*]], label [[IF_END3432:%.*]] +// SIMD-ONLY0: if.then3431: +// SIMD-ONLY0-NEXT: [[TMP1701:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1701]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3432]] +// SIMD-ONLY0: if.end3432: +// SIMD-ONLY0-NEXT: [[TMP1702:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1703:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3433:%.*]] = icmp sgt i64 [[TMP1702]], [[TMP1703]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3433]], label [[IF_THEN3435:%.*]], label [[IF_END3436:%.*]] +// SIMD-ONLY0: if.then3435: +// SIMD-ONLY0-NEXT: [[TMP1704:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1704]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3436]] +// SIMD-ONLY0: if.end3436: +// SIMD-ONLY0-NEXT: [[TMP1705:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1706:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3437:%.*]] = icmp slt i64 [[TMP1705]], [[TMP1706]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3437]], label [[IF_THEN3439:%.*]], label [[IF_END3440:%.*]] +// SIMD-ONLY0: if.then3439: +// SIMD-ONLY0-NEXT: [[TMP1707:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1707]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3440]] +// SIMD-ONLY0: if.end3440: +// SIMD-ONLY0-NEXT: [[TMP1708:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1709:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3441:%.*]] = icmp eq i64 [[TMP1708]], [[TMP1709]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3441]], label [[COND_TRUE3443:%.*]], label [[COND_FALSE3444:%.*]] +// SIMD-ONLY0: cond.true3443: +// SIMD-ONLY0-NEXT: [[TMP1710:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3445:%.*]] +// SIMD-ONLY0: cond.false3444: +// SIMD-ONLY0-NEXT: [[TMP1711:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3445]] +// SIMD-ONLY0: cond.end3445: +// SIMD-ONLY0-NEXT: [[COND3446:%.*]] = phi i64 [ [[TMP1710]], [[COND_TRUE3443]] ], [ [[TMP1711]], [[COND_FALSE3444]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3446]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1712:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1713:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3447:%.*]] = icmp eq i64 [[TMP1712]], [[TMP1713]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3447]], label [[COND_TRUE3449:%.*]], label [[COND_FALSE3450:%.*]] +// SIMD-ONLY0: cond.true3449: +// SIMD-ONLY0-NEXT: [[TMP1714:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3451:%.*]] +// SIMD-ONLY0: cond.false3450: +// SIMD-ONLY0-NEXT: [[TMP1715:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3451]] +// SIMD-ONLY0: cond.end3451: +// SIMD-ONLY0-NEXT: [[COND3452:%.*]] = phi i64 [ [[TMP1714]], [[COND_TRUE3449]] ], [ [[TMP1715]], [[COND_FALSE3450]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3452]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1716:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1717:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3453:%.*]] = icmp eq i64 [[TMP1716]], [[TMP1717]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3453]], label [[IF_THEN3455:%.*]], label [[IF_END3456:%.*]] +// SIMD-ONLY0: if.then3455: +// SIMD-ONLY0-NEXT: [[TMP1718:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1718]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3456]] +// SIMD-ONLY0: if.end3456: +// SIMD-ONLY0-NEXT: [[TMP1719:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1720:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3457:%.*]] = icmp eq i64 [[TMP1719]], [[TMP1720]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3457]], label [[IF_THEN3459:%.*]], label [[IF_END3460:%.*]] +// SIMD-ONLY0: if.then3459: +// SIMD-ONLY0-NEXT: [[TMP1721:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1721]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3460]] +// SIMD-ONLY0: if.end3460: +// SIMD-ONLY0-NEXT: [[TMP1722:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1723:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3461:%.*]] = icmp ugt i64 [[TMP1722]], [[TMP1723]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3461]], label [[COND_TRUE3463:%.*]], label [[COND_FALSE3464:%.*]] +// SIMD-ONLY0: cond.true3463: +// SIMD-ONLY0-NEXT: [[TMP1724:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3465:%.*]] +// SIMD-ONLY0: cond.false3464: +// SIMD-ONLY0-NEXT: [[TMP1725:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3465]] +// SIMD-ONLY0: cond.end3465: +// SIMD-ONLY0-NEXT: [[COND3466:%.*]] = phi i64 [ [[TMP1724]], [[COND_TRUE3463]] ], [ [[TMP1725]], [[COND_FALSE3464]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3466]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1726:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1727:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3467:%.*]] = icmp ult i64 [[TMP1726]], [[TMP1727]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3467]], label [[COND_TRUE3469:%.*]], label [[COND_FALSE3470:%.*]] +// SIMD-ONLY0: cond.true3469: +// SIMD-ONLY0-NEXT: [[TMP1728:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3471:%.*]] +// SIMD-ONLY0: cond.false3470: +// SIMD-ONLY0-NEXT: [[TMP1729:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3471]] +// SIMD-ONLY0: cond.end3471: +// SIMD-ONLY0-NEXT: [[COND3472:%.*]] = phi i64 [ [[TMP1728]], [[COND_TRUE3469]] ], [ [[TMP1729]], [[COND_FALSE3470]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3472]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1730:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1731:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3473:%.*]] = icmp ugt i64 [[TMP1730]], [[TMP1731]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3473]], label [[COND_TRUE3475:%.*]], label [[COND_FALSE3476:%.*]] +// SIMD-ONLY0: cond.true3475: +// SIMD-ONLY0-NEXT: [[TMP1732:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3477:%.*]] +// SIMD-ONLY0: cond.false3476: +// SIMD-ONLY0-NEXT: [[TMP1733:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3477]] +// SIMD-ONLY0: cond.end3477: +// SIMD-ONLY0-NEXT: [[COND3478:%.*]] = phi i64 [ [[TMP1732]], [[COND_TRUE3475]] ], [ [[TMP1733]], [[COND_FALSE3476]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3478]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1734:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1735:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3479:%.*]] = icmp ult i64 [[TMP1734]], [[TMP1735]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3479]], label [[COND_TRUE3481:%.*]], label [[COND_FALSE3482:%.*]] +// SIMD-ONLY0: cond.true3481: +// SIMD-ONLY0-NEXT: [[TMP1736:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3483:%.*]] +// SIMD-ONLY0: cond.false3482: +// SIMD-ONLY0-NEXT: [[TMP1737:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3483]] +// SIMD-ONLY0: cond.end3483: +// SIMD-ONLY0-NEXT: [[COND3484:%.*]] = phi i64 [ [[TMP1736]], [[COND_TRUE3481]] ], [ [[TMP1737]], [[COND_FALSE3482]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3484]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1738:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1739:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3485:%.*]] = icmp ugt i64 [[TMP1738]], [[TMP1739]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3485]], label [[IF_THEN3487:%.*]], label [[IF_END3488:%.*]] +// SIMD-ONLY0: if.then3487: +// SIMD-ONLY0-NEXT: [[TMP1740:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1740]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3488]] +// SIMD-ONLY0: if.end3488: +// SIMD-ONLY0-NEXT: [[TMP1741:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1742:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3489:%.*]] = icmp ult i64 [[TMP1741]], [[TMP1742]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3489]], label [[IF_THEN3491:%.*]], label [[IF_END3492:%.*]] +// SIMD-ONLY0: if.then3491: +// SIMD-ONLY0-NEXT: [[TMP1743:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1743]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3492]] +// SIMD-ONLY0: if.end3492: +// SIMD-ONLY0-NEXT: [[TMP1744:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1745:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3493:%.*]] = icmp ugt i64 [[TMP1744]], [[TMP1745]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3493]], label [[IF_THEN3495:%.*]], label [[IF_END3496:%.*]] +// SIMD-ONLY0: if.then3495: +// SIMD-ONLY0-NEXT: [[TMP1746:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1746]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3496]] +// SIMD-ONLY0: if.end3496: +// SIMD-ONLY0-NEXT: [[TMP1747:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1748:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3497:%.*]] = icmp ult i64 [[TMP1747]], [[TMP1748]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3497]], label [[IF_THEN3499:%.*]], label [[IF_END3500:%.*]] +// SIMD-ONLY0: if.then3499: +// SIMD-ONLY0-NEXT: [[TMP1749:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1749]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3500]] +// SIMD-ONLY0: if.end3500: +// SIMD-ONLY0-NEXT: [[TMP1750:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1751:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3501:%.*]] = icmp eq i64 [[TMP1750]], [[TMP1751]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3501]], label [[COND_TRUE3503:%.*]], label [[COND_FALSE3504:%.*]] +// SIMD-ONLY0: cond.true3503: +// SIMD-ONLY0-NEXT: [[TMP1752:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3505:%.*]] +// SIMD-ONLY0: cond.false3504: +// SIMD-ONLY0-NEXT: [[TMP1753:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3505]] +// SIMD-ONLY0: cond.end3505: +// SIMD-ONLY0-NEXT: [[COND3506:%.*]] = phi i64 [ [[TMP1752]], [[COND_TRUE3503]] ], [ [[TMP1753]], [[COND_FALSE3504]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3506]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1754:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1755:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3507:%.*]] = icmp eq i64 [[TMP1754]], [[TMP1755]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3507]], label [[COND_TRUE3509:%.*]], label [[COND_FALSE3510:%.*]] +// SIMD-ONLY0: cond.true3509: +// SIMD-ONLY0-NEXT: [[TMP1756:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3511:%.*]] +// SIMD-ONLY0: cond.false3510: +// SIMD-ONLY0-NEXT: [[TMP1757:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3511]] +// SIMD-ONLY0: cond.end3511: +// SIMD-ONLY0-NEXT: [[COND3512:%.*]] = phi i64 [ [[TMP1756]], [[COND_TRUE3509]] ], [ [[TMP1757]], [[COND_FALSE3510]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3512]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1758:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1759:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3513:%.*]] = icmp eq i64 [[TMP1758]], [[TMP1759]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3513]], label [[IF_THEN3515:%.*]], label [[IF_END3516:%.*]] +// SIMD-ONLY0: if.then3515: +// SIMD-ONLY0-NEXT: [[TMP1760:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1760]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3516]] +// SIMD-ONLY0: if.end3516: +// SIMD-ONLY0-NEXT: [[TMP1761:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1762:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3517:%.*]] = icmp eq i64 [[TMP1761]], [[TMP1762]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3517]], label [[IF_THEN3519:%.*]], label [[IF_END3520:%.*]] +// SIMD-ONLY0: if.then3519: +// SIMD-ONLY0-NEXT: [[TMP1763:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1763]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3520]] +// SIMD-ONLY0: if.end3520: +// SIMD-ONLY0-NEXT: [[TMP1764:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1765:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3521:%.*]] = icmp sgt i64 [[TMP1764]], [[TMP1765]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3521]], label [[COND_TRUE3523:%.*]], label [[COND_FALSE3524:%.*]] +// SIMD-ONLY0: cond.true3523: +// SIMD-ONLY0-NEXT: [[TMP1766:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3525:%.*]] +// SIMD-ONLY0: cond.false3524: +// SIMD-ONLY0-NEXT: [[TMP1767:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3525]] +// SIMD-ONLY0: cond.end3525: +// SIMD-ONLY0-NEXT: [[COND3526:%.*]] = phi i64 [ [[TMP1766]], [[COND_TRUE3523]] ], [ [[TMP1767]], [[COND_FALSE3524]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3526]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1768:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1769:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3527:%.*]] = icmp slt i64 [[TMP1768]], [[TMP1769]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3527]], label [[COND_TRUE3529:%.*]], label [[COND_FALSE3530:%.*]] +// SIMD-ONLY0: cond.true3529: +// SIMD-ONLY0-NEXT: [[TMP1770:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3531:%.*]] +// SIMD-ONLY0: cond.false3530: +// SIMD-ONLY0-NEXT: [[TMP1771:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3531]] +// SIMD-ONLY0: cond.end3531: +// SIMD-ONLY0-NEXT: [[COND3532:%.*]] = phi i64 [ [[TMP1770]], [[COND_TRUE3529]] ], [ [[TMP1771]], [[COND_FALSE3530]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3532]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1772:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1773:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3533:%.*]] = icmp sgt i64 [[TMP1772]], [[TMP1773]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3533]], label [[COND_TRUE3535:%.*]], label [[COND_FALSE3536:%.*]] +// SIMD-ONLY0: cond.true3535: +// SIMD-ONLY0-NEXT: [[TMP1774:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3537:%.*]] +// SIMD-ONLY0: cond.false3536: +// SIMD-ONLY0-NEXT: [[TMP1775:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3537]] +// SIMD-ONLY0: cond.end3537: +// SIMD-ONLY0-NEXT: [[COND3538:%.*]] = phi i64 [ [[TMP1774]], [[COND_TRUE3535]] ], [ [[TMP1775]], [[COND_FALSE3536]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3538]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1776:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1777:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3539:%.*]] = icmp slt i64 [[TMP1776]], [[TMP1777]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3539]], label [[COND_TRUE3541:%.*]], label [[COND_FALSE3542:%.*]] +// SIMD-ONLY0: cond.true3541: +// SIMD-ONLY0-NEXT: [[TMP1778:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3543:%.*]] +// SIMD-ONLY0: cond.false3542: +// SIMD-ONLY0-NEXT: [[TMP1779:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3543]] +// SIMD-ONLY0: cond.end3543: +// SIMD-ONLY0-NEXT: [[COND3544:%.*]] = phi i64 [ [[TMP1778]], [[COND_TRUE3541]] ], [ [[TMP1779]], [[COND_FALSE3542]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3544]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1780:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1781:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3545:%.*]] = icmp sgt i64 [[TMP1780]], [[TMP1781]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3545]], label [[IF_THEN3547:%.*]], label [[IF_END3548:%.*]] +// SIMD-ONLY0: if.then3547: +// SIMD-ONLY0-NEXT: [[TMP1782:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1782]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3548]] +// SIMD-ONLY0: if.end3548: +// SIMD-ONLY0-NEXT: [[TMP1783:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1784:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3549:%.*]] = icmp slt i64 [[TMP1783]], [[TMP1784]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3549]], label [[IF_THEN3551:%.*]], label [[IF_END3552:%.*]] +// SIMD-ONLY0: if.then3551: +// SIMD-ONLY0-NEXT: [[TMP1785:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1785]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3552]] +// SIMD-ONLY0: if.end3552: +// SIMD-ONLY0-NEXT: [[TMP1786:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1787:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3553:%.*]] = icmp sgt i64 [[TMP1786]], [[TMP1787]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3553]], label [[IF_THEN3555:%.*]], label [[IF_END3556:%.*]] +// SIMD-ONLY0: if.then3555: +// SIMD-ONLY0-NEXT: [[TMP1788:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1788]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3556]] +// SIMD-ONLY0: if.end3556: +// SIMD-ONLY0-NEXT: [[TMP1789:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1790:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3557:%.*]] = icmp slt i64 [[TMP1789]], [[TMP1790]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3557]], label [[IF_THEN3559:%.*]], label [[IF_END3560:%.*]] +// SIMD-ONLY0: if.then3559: +// SIMD-ONLY0-NEXT: [[TMP1791:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1791]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3560]] +// SIMD-ONLY0: if.end3560: +// SIMD-ONLY0-NEXT: [[TMP1792:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1793:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3561:%.*]] = icmp eq i64 [[TMP1792]], [[TMP1793]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3561]], label [[COND_TRUE3563:%.*]], label [[COND_FALSE3564:%.*]] +// SIMD-ONLY0: cond.true3563: +// SIMD-ONLY0-NEXT: [[TMP1794:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3565:%.*]] +// SIMD-ONLY0: cond.false3564: +// SIMD-ONLY0-NEXT: [[TMP1795:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3565]] +// SIMD-ONLY0: cond.end3565: +// SIMD-ONLY0-NEXT: [[COND3566:%.*]] = phi i64 [ [[TMP1794]], [[COND_TRUE3563]] ], [ [[TMP1795]], [[COND_FALSE3564]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3566]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1796:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1797:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3567:%.*]] = icmp eq i64 [[TMP1796]], [[TMP1797]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3567]], label [[COND_TRUE3569:%.*]], label [[COND_FALSE3570:%.*]] +// SIMD-ONLY0: cond.true3569: +// SIMD-ONLY0-NEXT: [[TMP1798:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3571:%.*]] +// SIMD-ONLY0: cond.false3570: +// SIMD-ONLY0-NEXT: [[TMP1799:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3571]] +// SIMD-ONLY0: cond.end3571: +// SIMD-ONLY0-NEXT: [[COND3572:%.*]] = phi i64 [ [[TMP1798]], [[COND_TRUE3569]] ], [ [[TMP1799]], [[COND_FALSE3570]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3572]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1800:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1801:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3573:%.*]] = icmp eq i64 [[TMP1800]], [[TMP1801]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3573]], label [[IF_THEN3575:%.*]], label [[IF_END3576:%.*]] +// SIMD-ONLY0: if.then3575: +// SIMD-ONLY0-NEXT: [[TMP1802:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1802]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3576]] +// SIMD-ONLY0: if.end3576: +// SIMD-ONLY0-NEXT: [[TMP1803:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1804:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3577:%.*]] = icmp eq i64 [[TMP1803]], [[TMP1804]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3577]], label [[IF_THEN3579:%.*]], label [[IF_END3580:%.*]] +// SIMD-ONLY0: if.then3579: +// SIMD-ONLY0-NEXT: [[TMP1805:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1805]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3580]] +// SIMD-ONLY0: if.end3580: +// SIMD-ONLY0-NEXT: [[TMP1806:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1807:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3581:%.*]] = icmp ugt i64 [[TMP1806]], [[TMP1807]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3581]], label [[COND_TRUE3583:%.*]], label [[COND_FALSE3584:%.*]] +// SIMD-ONLY0: cond.true3583: +// SIMD-ONLY0-NEXT: [[TMP1808:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3585:%.*]] +// SIMD-ONLY0: cond.false3584: +// SIMD-ONLY0-NEXT: [[TMP1809:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3585]] +// SIMD-ONLY0: cond.end3585: +// SIMD-ONLY0-NEXT: [[COND3586:%.*]] = phi i64 [ [[TMP1808]], [[COND_TRUE3583]] ], [ [[TMP1809]], [[COND_FALSE3584]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3586]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1810:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1811:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3587:%.*]] = icmp ult i64 [[TMP1810]], [[TMP1811]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3587]], label [[COND_TRUE3589:%.*]], label [[COND_FALSE3590:%.*]] +// SIMD-ONLY0: cond.true3589: +// SIMD-ONLY0-NEXT: [[TMP1812:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3591:%.*]] +// SIMD-ONLY0: cond.false3590: +// SIMD-ONLY0-NEXT: [[TMP1813:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3591]] +// SIMD-ONLY0: cond.end3591: +// SIMD-ONLY0-NEXT: [[COND3592:%.*]] = phi i64 [ [[TMP1812]], [[COND_TRUE3589]] ], [ [[TMP1813]], [[COND_FALSE3590]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3592]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1814:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1815:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3593:%.*]] = icmp ugt i64 [[TMP1814]], [[TMP1815]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3593]], label [[COND_TRUE3595:%.*]], label [[COND_FALSE3596:%.*]] +// SIMD-ONLY0: cond.true3595: +// SIMD-ONLY0-NEXT: [[TMP1816:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3597:%.*]] +// SIMD-ONLY0: cond.false3596: +// SIMD-ONLY0-NEXT: [[TMP1817:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3597]] +// SIMD-ONLY0: cond.end3597: +// SIMD-ONLY0-NEXT: [[COND3598:%.*]] = phi i64 [ [[TMP1816]], [[COND_TRUE3595]] ], [ [[TMP1817]], [[COND_FALSE3596]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3598]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1818:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1819:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3599:%.*]] = icmp ult i64 [[TMP1818]], [[TMP1819]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3599]], label [[COND_TRUE3601:%.*]], label [[COND_FALSE3602:%.*]] +// SIMD-ONLY0: cond.true3601: +// SIMD-ONLY0-NEXT: [[TMP1820:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3603:%.*]] +// SIMD-ONLY0: cond.false3602: +// SIMD-ONLY0-NEXT: [[TMP1821:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3603]] +// SIMD-ONLY0: cond.end3603: +// SIMD-ONLY0-NEXT: [[COND3604:%.*]] = phi i64 [ [[TMP1820]], [[COND_TRUE3601]] ], [ [[TMP1821]], [[COND_FALSE3602]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3604]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1822:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1823:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3605:%.*]] = icmp ugt i64 [[TMP1822]], [[TMP1823]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3605]], label [[IF_THEN3607:%.*]], label [[IF_END3608:%.*]] +// SIMD-ONLY0: if.then3607: +// SIMD-ONLY0-NEXT: [[TMP1824:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1824]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3608]] +// SIMD-ONLY0: if.end3608: +// SIMD-ONLY0-NEXT: [[TMP1825:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1826:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3609:%.*]] = icmp ult i64 [[TMP1825]], [[TMP1826]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3609]], label [[IF_THEN3611:%.*]], label [[IF_END3612:%.*]] +// SIMD-ONLY0: if.then3611: +// SIMD-ONLY0-NEXT: [[TMP1827:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1827]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3612]] +// SIMD-ONLY0: if.end3612: +// SIMD-ONLY0-NEXT: [[TMP1828:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1829:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3613:%.*]] = icmp ugt i64 [[TMP1828]], [[TMP1829]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3613]], label [[IF_THEN3615:%.*]], label [[IF_END3616:%.*]] +// SIMD-ONLY0: if.then3615: +// SIMD-ONLY0-NEXT: [[TMP1830:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1830]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3616]] +// SIMD-ONLY0: if.end3616: +// SIMD-ONLY0-NEXT: [[TMP1831:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1832:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3617:%.*]] = icmp ult i64 [[TMP1831]], [[TMP1832]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3617]], label [[IF_THEN3619:%.*]], label [[IF_END3620:%.*]] +// SIMD-ONLY0: if.then3619: +// SIMD-ONLY0-NEXT: [[TMP1833:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1833]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3620]] +// SIMD-ONLY0: if.end3620: +// SIMD-ONLY0-NEXT: [[TMP1834:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1835:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3621:%.*]] = icmp eq i64 [[TMP1834]], [[TMP1835]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3621]], label [[COND_TRUE3623:%.*]], label [[COND_FALSE3624:%.*]] +// SIMD-ONLY0: cond.true3623: +// SIMD-ONLY0-NEXT: [[TMP1836:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3625:%.*]] +// SIMD-ONLY0: cond.false3624: +// SIMD-ONLY0-NEXT: [[TMP1837:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3625]] +// SIMD-ONLY0: cond.end3625: +// SIMD-ONLY0-NEXT: [[COND3626:%.*]] = phi i64 [ [[TMP1836]], [[COND_TRUE3623]] ], [ [[TMP1837]], [[COND_FALSE3624]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3626]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1838:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1839:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3627:%.*]] = icmp eq i64 [[TMP1838]], [[TMP1839]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3627]], label [[COND_TRUE3629:%.*]], label [[COND_FALSE3630:%.*]] +// SIMD-ONLY0: cond.true3629: +// SIMD-ONLY0-NEXT: [[TMP1840:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3631:%.*]] +// SIMD-ONLY0: cond.false3630: +// SIMD-ONLY0-NEXT: [[TMP1841:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3631]] +// SIMD-ONLY0: cond.end3631: +// SIMD-ONLY0-NEXT: [[COND3632:%.*]] = phi i64 [ [[TMP1840]], [[COND_TRUE3629]] ], [ [[TMP1841]], [[COND_FALSE3630]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3632]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1842:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1843:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3633:%.*]] = icmp eq i64 [[TMP1842]], [[TMP1843]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3633]], label [[IF_THEN3635:%.*]], label [[IF_END3636:%.*]] +// SIMD-ONLY0: if.then3635: +// SIMD-ONLY0-NEXT: [[TMP1844:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1844]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3636]] +// SIMD-ONLY0: if.end3636: +// SIMD-ONLY0-NEXT: [[TMP1845:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1846:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3637:%.*]] = icmp eq i64 [[TMP1845]], [[TMP1846]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3637]], label [[IF_THEN3639:%.*]], label [[IF_END3640:%.*]] +// SIMD-ONLY0: if.then3639: +// SIMD-ONLY0-NEXT: [[TMP1847:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1847]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3640]] +// SIMD-ONLY0: if.end3640: +// SIMD-ONLY0-NEXT: [[TMP1848:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1849:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3641:%.*]] = icmp sgt i64 [[TMP1848]], [[TMP1849]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3641]], label [[COND_TRUE3643:%.*]], label [[COND_FALSE3644:%.*]] +// SIMD-ONLY0: cond.true3643: +// SIMD-ONLY0-NEXT: [[TMP1850:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3645:%.*]] +// SIMD-ONLY0: cond.false3644: +// SIMD-ONLY0-NEXT: [[TMP1851:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3645]] +// SIMD-ONLY0: cond.end3645: +// SIMD-ONLY0-NEXT: [[COND3646:%.*]] = phi i64 [ [[TMP1850]], [[COND_TRUE3643]] ], [ [[TMP1851]], [[COND_FALSE3644]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3646]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1852:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1853:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3647:%.*]] = icmp slt i64 [[TMP1852]], [[TMP1853]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3647]], label [[COND_TRUE3649:%.*]], label [[COND_FALSE3650:%.*]] +// SIMD-ONLY0: cond.true3649: +// SIMD-ONLY0-NEXT: [[TMP1854:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3651:%.*]] +// SIMD-ONLY0: cond.false3650: +// SIMD-ONLY0-NEXT: [[TMP1855:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3651]] +// SIMD-ONLY0: cond.end3651: +// SIMD-ONLY0-NEXT: [[COND3652:%.*]] = phi i64 [ [[TMP1854]], [[COND_TRUE3649]] ], [ [[TMP1855]], [[COND_FALSE3650]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3652]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1856:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1857:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3653:%.*]] = icmp sgt i64 [[TMP1856]], [[TMP1857]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3653]], label [[COND_TRUE3655:%.*]], label [[COND_FALSE3656:%.*]] +// SIMD-ONLY0: cond.true3655: +// SIMD-ONLY0-NEXT: [[TMP1858:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3657:%.*]] +// SIMD-ONLY0: cond.false3656: +// SIMD-ONLY0-NEXT: [[TMP1859:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3657]] +// SIMD-ONLY0: cond.end3657: +// SIMD-ONLY0-NEXT: [[COND3658:%.*]] = phi i64 [ [[TMP1858]], [[COND_TRUE3655]] ], [ [[TMP1859]], [[COND_FALSE3656]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3658]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1860:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1861:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3659:%.*]] = icmp slt i64 [[TMP1860]], [[TMP1861]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3659]], label [[COND_TRUE3661:%.*]], label [[COND_FALSE3662:%.*]] +// SIMD-ONLY0: cond.true3661: +// SIMD-ONLY0-NEXT: [[TMP1862:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3663:%.*]] +// SIMD-ONLY0: cond.false3662: +// SIMD-ONLY0-NEXT: [[TMP1863:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3663]] +// SIMD-ONLY0: cond.end3663: +// SIMD-ONLY0-NEXT: [[COND3664:%.*]] = phi i64 [ [[TMP1862]], [[COND_TRUE3661]] ], [ [[TMP1863]], [[COND_FALSE3662]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3664]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1864:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1865:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3665:%.*]] = icmp sgt i64 [[TMP1864]], [[TMP1865]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3665]], label [[IF_THEN3667:%.*]], label [[IF_END3668:%.*]] +// SIMD-ONLY0: if.then3667: +// SIMD-ONLY0-NEXT: [[TMP1866:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1866]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3668]] +// SIMD-ONLY0: if.end3668: +// SIMD-ONLY0-NEXT: [[TMP1867:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1868:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3669:%.*]] = icmp slt i64 [[TMP1867]], [[TMP1868]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3669]], label [[IF_THEN3671:%.*]], label [[IF_END3672:%.*]] +// SIMD-ONLY0: if.then3671: +// SIMD-ONLY0-NEXT: [[TMP1869:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1869]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3672]] +// SIMD-ONLY0: if.end3672: +// SIMD-ONLY0-NEXT: [[TMP1870:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1871:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3673:%.*]] = icmp sgt i64 [[TMP1870]], [[TMP1871]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3673]], label [[IF_THEN3675:%.*]], label [[IF_END3676:%.*]] +// SIMD-ONLY0: if.then3675: +// SIMD-ONLY0-NEXT: [[TMP1872:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1872]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3676]] +// SIMD-ONLY0: if.end3676: +// SIMD-ONLY0-NEXT: [[TMP1873:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1874:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3677:%.*]] = icmp slt i64 [[TMP1873]], [[TMP1874]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3677]], label [[IF_THEN3679:%.*]], label [[IF_END3680:%.*]] +// SIMD-ONLY0: if.then3679: +// SIMD-ONLY0-NEXT: [[TMP1875:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1875]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3680]] +// SIMD-ONLY0: if.end3680: +// SIMD-ONLY0-NEXT: [[TMP1876:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1877:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3681:%.*]] = icmp eq i64 [[TMP1876]], [[TMP1877]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3681]], label [[COND_TRUE3683:%.*]], label [[COND_FALSE3684:%.*]] +// SIMD-ONLY0: cond.true3683: +// SIMD-ONLY0-NEXT: [[TMP1878:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3685:%.*]] +// SIMD-ONLY0: cond.false3684: +// SIMD-ONLY0-NEXT: [[TMP1879:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3685]] +// SIMD-ONLY0: cond.end3685: +// SIMD-ONLY0-NEXT: [[COND3686:%.*]] = phi i64 [ [[TMP1878]], [[COND_TRUE3683]] ], [ [[TMP1879]], [[COND_FALSE3684]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3686]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1880:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1881:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3687:%.*]] = icmp eq i64 [[TMP1880]], [[TMP1881]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3687]], label [[COND_TRUE3689:%.*]], label [[COND_FALSE3690:%.*]] +// SIMD-ONLY0: cond.true3689: +// SIMD-ONLY0-NEXT: [[TMP1882:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3691:%.*]] +// SIMD-ONLY0: cond.false3690: +// SIMD-ONLY0-NEXT: [[TMP1883:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3691]] +// SIMD-ONLY0: cond.end3691: +// SIMD-ONLY0-NEXT: [[COND3692:%.*]] = phi i64 [ [[TMP1882]], [[COND_TRUE3689]] ], [ [[TMP1883]], [[COND_FALSE3690]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3692]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1884:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1885:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3693:%.*]] = icmp eq i64 [[TMP1884]], [[TMP1885]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3693]], label [[IF_THEN3695:%.*]], label [[IF_END3696:%.*]] +// SIMD-ONLY0: if.then3695: +// SIMD-ONLY0-NEXT: [[TMP1886:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1886]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3696]] +// SIMD-ONLY0: if.end3696: +// SIMD-ONLY0-NEXT: [[TMP1887:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1888:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3697:%.*]] = icmp eq i64 [[TMP1887]], [[TMP1888]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3697]], label [[IF_THEN3699:%.*]], label [[IF_END3700:%.*]] +// SIMD-ONLY0: if.then3699: +// SIMD-ONLY0-NEXT: [[TMP1889:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1889]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3700]] +// SIMD-ONLY0: if.end3700: +// SIMD-ONLY0-NEXT: [[TMP1890:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1891:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3701:%.*]] = icmp ugt i64 [[TMP1890]], [[TMP1891]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3701]], label [[COND_TRUE3703:%.*]], label [[COND_FALSE3704:%.*]] +// SIMD-ONLY0: cond.true3703: +// SIMD-ONLY0-NEXT: [[TMP1892:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3705:%.*]] +// SIMD-ONLY0: cond.false3704: +// SIMD-ONLY0-NEXT: [[TMP1893:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3705]] +// SIMD-ONLY0: cond.end3705: +// SIMD-ONLY0-NEXT: [[COND3706:%.*]] = phi i64 [ [[TMP1892]], [[COND_TRUE3703]] ], [ [[TMP1893]], [[COND_FALSE3704]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3706]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1894:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1895:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3707:%.*]] = icmp ult i64 [[TMP1894]], [[TMP1895]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3707]], label [[COND_TRUE3709:%.*]], label [[COND_FALSE3710:%.*]] +// SIMD-ONLY0: cond.true3709: +// SIMD-ONLY0-NEXT: [[TMP1896:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3711:%.*]] +// SIMD-ONLY0: cond.false3710: +// SIMD-ONLY0-NEXT: [[TMP1897:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3711]] +// SIMD-ONLY0: cond.end3711: +// SIMD-ONLY0-NEXT: [[COND3712:%.*]] = phi i64 [ [[TMP1896]], [[COND_TRUE3709]] ], [ [[TMP1897]], [[COND_FALSE3710]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3712]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1898:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1899:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3713:%.*]] = icmp ugt i64 [[TMP1898]], [[TMP1899]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3713]], label [[COND_TRUE3715:%.*]], label [[COND_FALSE3716:%.*]] +// SIMD-ONLY0: cond.true3715: +// SIMD-ONLY0-NEXT: [[TMP1900:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3717:%.*]] +// SIMD-ONLY0: cond.false3716: +// SIMD-ONLY0-NEXT: [[TMP1901:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3717]] +// SIMD-ONLY0: cond.end3717: +// SIMD-ONLY0-NEXT: [[COND3718:%.*]] = phi i64 [ [[TMP1900]], [[COND_TRUE3715]] ], [ [[TMP1901]], [[COND_FALSE3716]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3718]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1902:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1903:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3719:%.*]] = icmp ult i64 [[TMP1902]], [[TMP1903]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3719]], label [[COND_TRUE3721:%.*]], label [[COND_FALSE3722:%.*]] +// SIMD-ONLY0: cond.true3721: +// SIMD-ONLY0-NEXT: [[TMP1904:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3723:%.*]] +// SIMD-ONLY0: cond.false3722: +// SIMD-ONLY0-NEXT: [[TMP1905:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3723]] +// SIMD-ONLY0: cond.end3723: +// SIMD-ONLY0-NEXT: [[COND3724:%.*]] = phi i64 [ [[TMP1904]], [[COND_TRUE3721]] ], [ [[TMP1905]], [[COND_FALSE3722]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3724]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1906:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1907:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3725:%.*]] = icmp ugt i64 [[TMP1906]], [[TMP1907]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3725]], label [[IF_THEN3727:%.*]], label [[IF_END3728:%.*]] +// SIMD-ONLY0: if.then3727: +// SIMD-ONLY0-NEXT: [[TMP1908:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1908]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3728]] +// SIMD-ONLY0: if.end3728: +// SIMD-ONLY0-NEXT: [[TMP1909:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1910:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3729:%.*]] = icmp ult i64 [[TMP1909]], [[TMP1910]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3729]], label [[IF_THEN3731:%.*]], label [[IF_END3732:%.*]] +// SIMD-ONLY0: if.then3731: +// SIMD-ONLY0-NEXT: [[TMP1911:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1911]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3732]] +// SIMD-ONLY0: if.end3732: +// SIMD-ONLY0-NEXT: [[TMP1912:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1913:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3733:%.*]] = icmp ugt i64 [[TMP1912]], [[TMP1913]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3733]], label [[IF_THEN3735:%.*]], label [[IF_END3736:%.*]] +// SIMD-ONLY0: if.then3735: +// SIMD-ONLY0-NEXT: [[TMP1914:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1914]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3736]] +// SIMD-ONLY0: if.end3736: +// SIMD-ONLY0-NEXT: [[TMP1915:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1916:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3737:%.*]] = icmp ult i64 [[TMP1915]], [[TMP1916]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3737]], label [[IF_THEN3739:%.*]], label [[IF_END3740:%.*]] +// SIMD-ONLY0: if.then3739: +// SIMD-ONLY0-NEXT: [[TMP1917:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1917]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3740]] +// SIMD-ONLY0: if.end3740: +// SIMD-ONLY0-NEXT: [[TMP1918:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1919:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3741:%.*]] = icmp eq i64 [[TMP1918]], [[TMP1919]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3741]], label [[COND_TRUE3743:%.*]], label [[COND_FALSE3744:%.*]] +// SIMD-ONLY0: cond.true3743: +// SIMD-ONLY0-NEXT: [[TMP1920:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3745:%.*]] +// SIMD-ONLY0: cond.false3744: +// SIMD-ONLY0-NEXT: [[TMP1921:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3745]] +// SIMD-ONLY0: cond.end3745: +// SIMD-ONLY0-NEXT: [[COND3746:%.*]] = phi i64 [ [[TMP1920]], [[COND_TRUE3743]] ], [ [[TMP1921]], [[COND_FALSE3744]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3746]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1922:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1923:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3747:%.*]] = icmp eq i64 [[TMP1922]], [[TMP1923]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3747]], label [[COND_TRUE3749:%.*]], label [[COND_FALSE3750:%.*]] +// SIMD-ONLY0: cond.true3749: +// SIMD-ONLY0-NEXT: [[TMP1924:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3751:%.*]] +// SIMD-ONLY0: cond.false3750: +// SIMD-ONLY0-NEXT: [[TMP1925:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3751]] +// SIMD-ONLY0: cond.end3751: +// SIMD-ONLY0-NEXT: [[COND3752:%.*]] = phi i64 [ [[TMP1924]], [[COND_TRUE3749]] ], [ [[TMP1925]], [[COND_FALSE3750]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3752]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1926:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1927:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3753:%.*]] = icmp eq i64 [[TMP1926]], [[TMP1927]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3753]], label [[IF_THEN3755:%.*]], label [[IF_END3756:%.*]] +// SIMD-ONLY0: if.then3755: +// SIMD-ONLY0-NEXT: [[TMP1928:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1928]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3756]] +// SIMD-ONLY0: if.end3756: +// SIMD-ONLY0-NEXT: [[TMP1929:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1930:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3757:%.*]] = icmp eq i64 [[TMP1929]], [[TMP1930]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3757]], label [[IF_THEN3759:%.*]], label [[IF_END3760:%.*]] +// SIMD-ONLY0: if.then3759: +// SIMD-ONLY0-NEXT: [[TMP1931:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1931]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3760]] +// SIMD-ONLY0: if.end3760: +// SIMD-ONLY0-NEXT: [[TMP1932:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1933:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3761:%.*]] = icmp sgt i64 [[TMP1932]], [[TMP1933]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3761]], label [[COND_TRUE3763:%.*]], label [[COND_FALSE3764:%.*]] +// SIMD-ONLY0: cond.true3763: +// SIMD-ONLY0-NEXT: [[TMP1934:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3765:%.*]] +// SIMD-ONLY0: cond.false3764: +// SIMD-ONLY0-NEXT: [[TMP1935:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3765]] +// SIMD-ONLY0: cond.end3765: +// SIMD-ONLY0-NEXT: [[COND3766:%.*]] = phi i64 [ [[TMP1934]], [[COND_TRUE3763]] ], [ [[TMP1935]], [[COND_FALSE3764]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3766]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1936:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1937:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3767:%.*]] = icmp slt i64 [[TMP1936]], [[TMP1937]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3767]], label [[COND_TRUE3769:%.*]], label [[COND_FALSE3770:%.*]] +// SIMD-ONLY0: cond.true3769: +// SIMD-ONLY0-NEXT: [[TMP1938:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3771:%.*]] +// SIMD-ONLY0: cond.false3770: +// SIMD-ONLY0-NEXT: [[TMP1939:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3771]] +// SIMD-ONLY0: cond.end3771: +// SIMD-ONLY0-NEXT: [[COND3772:%.*]] = phi i64 [ [[TMP1938]], [[COND_TRUE3769]] ], [ [[TMP1939]], [[COND_FALSE3770]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3772]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1940:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1941:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3773:%.*]] = icmp sgt i64 [[TMP1940]], [[TMP1941]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3773]], label [[COND_TRUE3775:%.*]], label [[COND_FALSE3776:%.*]] +// SIMD-ONLY0: cond.true3775: +// SIMD-ONLY0-NEXT: [[TMP1942:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3777:%.*]] +// SIMD-ONLY0: cond.false3776: +// SIMD-ONLY0-NEXT: [[TMP1943:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3777]] +// SIMD-ONLY0: cond.end3777: +// SIMD-ONLY0-NEXT: [[COND3778:%.*]] = phi i64 [ [[TMP1942]], [[COND_TRUE3775]] ], [ [[TMP1943]], [[COND_FALSE3776]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3778]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1944:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1945:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3779:%.*]] = icmp slt i64 [[TMP1944]], [[TMP1945]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3779]], label [[COND_TRUE3781:%.*]], label [[COND_FALSE3782:%.*]] +// SIMD-ONLY0: cond.true3781: +// SIMD-ONLY0-NEXT: [[TMP1946:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3783:%.*]] +// SIMD-ONLY0: cond.false3782: +// SIMD-ONLY0-NEXT: [[TMP1947:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3783]] +// SIMD-ONLY0: cond.end3783: +// SIMD-ONLY0-NEXT: [[COND3784:%.*]] = phi i64 [ [[TMP1946]], [[COND_TRUE3781]] ], [ [[TMP1947]], [[COND_FALSE3782]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3784]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1948:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1949:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3785:%.*]] = icmp sgt i64 [[TMP1948]], [[TMP1949]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3785]], label [[IF_THEN3787:%.*]], label [[IF_END3788:%.*]] +// SIMD-ONLY0: if.then3787: +// SIMD-ONLY0-NEXT: [[TMP1950:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1950]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3788]] +// SIMD-ONLY0: if.end3788: +// SIMD-ONLY0-NEXT: [[TMP1951:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1952:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3789:%.*]] = icmp slt i64 [[TMP1951]], [[TMP1952]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3789]], label [[IF_THEN3791:%.*]], label [[IF_END3792:%.*]] +// SIMD-ONLY0: if.then3791: +// SIMD-ONLY0-NEXT: [[TMP1953:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1953]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3792]] +// SIMD-ONLY0: if.end3792: +// SIMD-ONLY0-NEXT: [[TMP1954:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1955:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3793:%.*]] = icmp sgt i64 [[TMP1954]], [[TMP1955]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3793]], label [[IF_THEN3795:%.*]], label [[IF_END3796:%.*]] +// SIMD-ONLY0: if.then3795: +// SIMD-ONLY0-NEXT: [[TMP1956:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1956]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3796]] +// SIMD-ONLY0: if.end3796: +// SIMD-ONLY0-NEXT: [[TMP1957:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1958:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3797:%.*]] = icmp slt i64 [[TMP1957]], [[TMP1958]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3797]], label [[IF_THEN3799:%.*]], label [[IF_END3800:%.*]] +// SIMD-ONLY0: if.then3799: +// SIMD-ONLY0-NEXT: [[TMP1959:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1959]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3800]] +// SIMD-ONLY0: if.end3800: +// SIMD-ONLY0-NEXT: [[TMP1960:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1961:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3801:%.*]] = icmp eq i64 [[TMP1960]], [[TMP1961]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3801]], label [[COND_TRUE3803:%.*]], label [[COND_FALSE3804:%.*]] +// SIMD-ONLY0: cond.true3803: +// SIMD-ONLY0-NEXT: [[TMP1962:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3805:%.*]] +// SIMD-ONLY0: cond.false3804: +// SIMD-ONLY0-NEXT: [[TMP1963:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3805]] +// SIMD-ONLY0: cond.end3805: +// SIMD-ONLY0-NEXT: [[COND3806:%.*]] = phi i64 [ [[TMP1962]], [[COND_TRUE3803]] ], [ [[TMP1963]], [[COND_FALSE3804]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3806]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1964:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1965:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3807:%.*]] = icmp eq i64 [[TMP1964]], [[TMP1965]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3807]], label [[COND_TRUE3809:%.*]], label [[COND_FALSE3810:%.*]] +// SIMD-ONLY0: cond.true3809: +// SIMD-ONLY0-NEXT: [[TMP1966:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3811:%.*]] +// SIMD-ONLY0: cond.false3810: +// SIMD-ONLY0-NEXT: [[TMP1967:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3811]] +// SIMD-ONLY0: cond.end3811: +// SIMD-ONLY0-NEXT: [[COND3812:%.*]] = phi i64 [ [[TMP1966]], [[COND_TRUE3809]] ], [ [[TMP1967]], [[COND_FALSE3810]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3812]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1968:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1969:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3813:%.*]] = icmp eq i64 [[TMP1968]], [[TMP1969]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3813]], label [[IF_THEN3815:%.*]], label [[IF_END3816:%.*]] +// SIMD-ONLY0: if.then3815: +// SIMD-ONLY0-NEXT: [[TMP1970:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1970]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3816]] +// SIMD-ONLY0: if.end3816: +// SIMD-ONLY0-NEXT: [[TMP1971:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1972:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3817:%.*]] = icmp eq i64 [[TMP1971]], [[TMP1972]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3817]], label [[IF_THEN3819:%.*]], label [[IF_END3820:%.*]] +// SIMD-ONLY0: if.then3819: +// SIMD-ONLY0-NEXT: [[TMP1973:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1973]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3820]] +// SIMD-ONLY0: if.end3820: +// SIMD-ONLY0-NEXT: [[TMP1974:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1975:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3821:%.*]] = icmp ugt i64 [[TMP1974]], [[TMP1975]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3821]], label [[COND_TRUE3823:%.*]], label [[COND_FALSE3824:%.*]] +// SIMD-ONLY0: cond.true3823: +// SIMD-ONLY0-NEXT: [[TMP1976:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3825:%.*]] +// SIMD-ONLY0: cond.false3824: +// SIMD-ONLY0-NEXT: [[TMP1977:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3825]] +// SIMD-ONLY0: cond.end3825: +// SIMD-ONLY0-NEXT: [[COND3826:%.*]] = phi i64 [ [[TMP1976]], [[COND_TRUE3823]] ], [ [[TMP1977]], [[COND_FALSE3824]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3826]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1978:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1979:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3827:%.*]] = icmp ult i64 [[TMP1978]], [[TMP1979]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3827]], label [[COND_TRUE3829:%.*]], label [[COND_FALSE3830:%.*]] +// SIMD-ONLY0: cond.true3829: +// SIMD-ONLY0-NEXT: [[TMP1980:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3831:%.*]] +// SIMD-ONLY0: cond.false3830: +// SIMD-ONLY0-NEXT: [[TMP1981:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3831]] +// SIMD-ONLY0: cond.end3831: +// SIMD-ONLY0-NEXT: [[COND3832:%.*]] = phi i64 [ [[TMP1980]], [[COND_TRUE3829]] ], [ [[TMP1981]], [[COND_FALSE3830]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3832]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1982:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1983:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3833:%.*]] = icmp ugt i64 [[TMP1982]], [[TMP1983]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3833]], label [[COND_TRUE3835:%.*]], label [[COND_FALSE3836:%.*]] +// SIMD-ONLY0: cond.true3835: +// SIMD-ONLY0-NEXT: [[TMP1984:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3837:%.*]] +// SIMD-ONLY0: cond.false3836: +// SIMD-ONLY0-NEXT: [[TMP1985:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3837]] +// SIMD-ONLY0: cond.end3837: +// SIMD-ONLY0-NEXT: [[COND3838:%.*]] = phi i64 [ [[TMP1984]], [[COND_TRUE3835]] ], [ [[TMP1985]], [[COND_FALSE3836]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3838]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1986:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1987:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3839:%.*]] = icmp ult i64 [[TMP1986]], [[TMP1987]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3839]], label [[COND_TRUE3841:%.*]], label [[COND_FALSE3842:%.*]] +// SIMD-ONLY0: cond.true3841: +// SIMD-ONLY0-NEXT: [[TMP1988:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3843:%.*]] +// SIMD-ONLY0: cond.false3842: +// SIMD-ONLY0-NEXT: [[TMP1989:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3843]] +// SIMD-ONLY0: cond.end3843: +// SIMD-ONLY0-NEXT: [[COND3844:%.*]] = phi i64 [ [[TMP1988]], [[COND_TRUE3841]] ], [ [[TMP1989]], [[COND_FALSE3842]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3844]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1990:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1991:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3845:%.*]] = icmp ugt i64 [[TMP1990]], [[TMP1991]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3845]], label [[IF_THEN3847:%.*]], label [[IF_END3848:%.*]] +// SIMD-ONLY0: if.then3847: +// SIMD-ONLY0-NEXT: [[TMP1992:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1992]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3848]] +// SIMD-ONLY0: if.end3848: +// SIMD-ONLY0-NEXT: [[TMP1993:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1994:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3849:%.*]] = icmp ult i64 [[TMP1993]], [[TMP1994]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3849]], label [[IF_THEN3851:%.*]], label [[IF_END3852:%.*]] +// SIMD-ONLY0: if.then3851: +// SIMD-ONLY0-NEXT: [[TMP1995:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1995]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3852]] +// SIMD-ONLY0: if.end3852: +// SIMD-ONLY0-NEXT: [[TMP1996:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1997:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3853:%.*]] = icmp ugt i64 [[TMP1996]], [[TMP1997]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3853]], label [[IF_THEN3855:%.*]], label [[IF_END3856:%.*]] +// SIMD-ONLY0: if.then3855: +// SIMD-ONLY0-NEXT: [[TMP1998:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP1998]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3856]] +// SIMD-ONLY0: if.end3856: +// SIMD-ONLY0-NEXT: [[TMP1999:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2000:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3857:%.*]] = icmp ult i64 [[TMP1999]], [[TMP2000]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3857]], label [[IF_THEN3859:%.*]], label [[IF_END3860:%.*]] +// SIMD-ONLY0: if.then3859: +// SIMD-ONLY0-NEXT: [[TMP2001:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2001]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3860]] +// SIMD-ONLY0: if.end3860: +// SIMD-ONLY0-NEXT: [[TMP2002:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2003:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3861:%.*]] = icmp eq i64 [[TMP2002]], [[TMP2003]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3861]], label [[COND_TRUE3863:%.*]], label [[COND_FALSE3864:%.*]] +// SIMD-ONLY0: cond.true3863: +// SIMD-ONLY0-NEXT: [[TMP2004:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3865:%.*]] +// SIMD-ONLY0: cond.false3864: +// SIMD-ONLY0-NEXT: [[TMP2005:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3865]] +// SIMD-ONLY0: cond.end3865: +// SIMD-ONLY0-NEXT: [[COND3866:%.*]] = phi i64 [ [[TMP2004]], [[COND_TRUE3863]] ], [ [[TMP2005]], [[COND_FALSE3864]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3866]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2006:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2007:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3867:%.*]] = icmp eq i64 [[TMP2006]], [[TMP2007]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3867]], label [[COND_TRUE3869:%.*]], label [[COND_FALSE3870:%.*]] +// SIMD-ONLY0: cond.true3869: +// SIMD-ONLY0-NEXT: [[TMP2008:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3871:%.*]] +// SIMD-ONLY0: cond.false3870: +// SIMD-ONLY0-NEXT: [[TMP2009:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3871]] +// SIMD-ONLY0: cond.end3871: +// SIMD-ONLY0-NEXT: [[COND3872:%.*]] = phi i64 [ [[TMP2008]], [[COND_TRUE3869]] ], [ [[TMP2009]], [[COND_FALSE3870]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3872]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2010:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2011:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3873:%.*]] = icmp eq i64 [[TMP2010]], [[TMP2011]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3873]], label [[IF_THEN3875:%.*]], label [[IF_END3876:%.*]] +// SIMD-ONLY0: if.then3875: +// SIMD-ONLY0-NEXT: [[TMP2012:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2012]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3876]] +// SIMD-ONLY0: if.end3876: +// SIMD-ONLY0-NEXT: [[TMP2013:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2014:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3877:%.*]] = icmp eq i64 [[TMP2013]], [[TMP2014]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3877]], label [[IF_THEN3879:%.*]], label [[IF_END3880:%.*]] +// SIMD-ONLY0: if.then3879: +// SIMD-ONLY0-NEXT: [[TMP2015:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2015]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3880]] +// SIMD-ONLY0: if.end3880: +// SIMD-ONLY0-NEXT: [[TMP2016:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2017:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3881:%.*]] = icmp sgt i64 [[TMP2016]], [[TMP2017]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3881]], label [[COND_TRUE3883:%.*]], label [[COND_FALSE3884:%.*]] +// SIMD-ONLY0: cond.true3883: +// SIMD-ONLY0-NEXT: [[TMP2018:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3885:%.*]] +// SIMD-ONLY0: cond.false3884: +// SIMD-ONLY0-NEXT: [[TMP2019:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3885]] +// SIMD-ONLY0: cond.end3885: +// SIMD-ONLY0-NEXT: [[COND3886:%.*]] = phi i64 [ [[TMP2018]], [[COND_TRUE3883]] ], [ [[TMP2019]], [[COND_FALSE3884]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3886]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2020:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2021:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3887:%.*]] = icmp slt i64 [[TMP2020]], [[TMP2021]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3887]], label [[COND_TRUE3889:%.*]], label [[COND_FALSE3890:%.*]] +// SIMD-ONLY0: cond.true3889: +// SIMD-ONLY0-NEXT: [[TMP2022:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3891:%.*]] +// SIMD-ONLY0: cond.false3890: +// SIMD-ONLY0-NEXT: [[TMP2023:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3891]] +// SIMD-ONLY0: cond.end3891: +// SIMD-ONLY0-NEXT: [[COND3892:%.*]] = phi i64 [ [[TMP2022]], [[COND_TRUE3889]] ], [ [[TMP2023]], [[COND_FALSE3890]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3892]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2024:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2025:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3893:%.*]] = icmp sgt i64 [[TMP2024]], [[TMP2025]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3893]], label [[COND_TRUE3895:%.*]], label [[COND_FALSE3896:%.*]] +// SIMD-ONLY0: cond.true3895: +// SIMD-ONLY0-NEXT: [[TMP2026:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3897:%.*]] +// SIMD-ONLY0: cond.false3896: +// SIMD-ONLY0-NEXT: [[TMP2027:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3897]] +// SIMD-ONLY0: cond.end3897: +// SIMD-ONLY0-NEXT: [[COND3898:%.*]] = phi i64 [ [[TMP2026]], [[COND_TRUE3895]] ], [ [[TMP2027]], [[COND_FALSE3896]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3898]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2028:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2029:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3899:%.*]] = icmp slt i64 [[TMP2028]], [[TMP2029]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3899]], label [[COND_TRUE3901:%.*]], label [[COND_FALSE3902:%.*]] +// SIMD-ONLY0: cond.true3901: +// SIMD-ONLY0-NEXT: [[TMP2030:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3903:%.*]] +// SIMD-ONLY0: cond.false3902: +// SIMD-ONLY0-NEXT: [[TMP2031:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3903]] +// SIMD-ONLY0: cond.end3903: +// SIMD-ONLY0-NEXT: [[COND3904:%.*]] = phi i64 [ [[TMP2030]], [[COND_TRUE3901]] ], [ [[TMP2031]], [[COND_FALSE3902]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3904]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2032:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2033:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3905:%.*]] = icmp sgt i64 [[TMP2032]], [[TMP2033]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3905]], label [[IF_THEN3907:%.*]], label [[IF_END3908:%.*]] +// SIMD-ONLY0: if.then3907: +// SIMD-ONLY0-NEXT: [[TMP2034:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2034]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3908]] +// SIMD-ONLY0: if.end3908: +// SIMD-ONLY0-NEXT: [[TMP2035:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2036:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3909:%.*]] = icmp slt i64 [[TMP2035]], [[TMP2036]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3909]], label [[IF_THEN3911:%.*]], label [[IF_END3912:%.*]] +// SIMD-ONLY0: if.then3911: +// SIMD-ONLY0-NEXT: [[TMP2037:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2037]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3912]] +// SIMD-ONLY0: if.end3912: +// SIMD-ONLY0-NEXT: [[TMP2038:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2039:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3913:%.*]] = icmp sgt i64 [[TMP2038]], [[TMP2039]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3913]], label [[IF_THEN3915:%.*]], label [[IF_END3916:%.*]] +// SIMD-ONLY0: if.then3915: +// SIMD-ONLY0-NEXT: [[TMP2040:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2040]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3916]] +// SIMD-ONLY0: if.end3916: +// SIMD-ONLY0-NEXT: [[TMP2041:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2042:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3917:%.*]] = icmp slt i64 [[TMP2041]], [[TMP2042]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3917]], label [[IF_THEN3919:%.*]], label [[IF_END3920:%.*]] +// SIMD-ONLY0: if.then3919: +// SIMD-ONLY0-NEXT: [[TMP2043:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2043]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3920]] +// SIMD-ONLY0: if.end3920: +// SIMD-ONLY0-NEXT: [[TMP2044:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2045:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3921:%.*]] = icmp eq i64 [[TMP2044]], [[TMP2045]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3921]], label [[COND_TRUE3923:%.*]], label [[COND_FALSE3924:%.*]] +// SIMD-ONLY0: cond.true3923: +// SIMD-ONLY0-NEXT: [[TMP2046:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3925:%.*]] +// SIMD-ONLY0: cond.false3924: +// SIMD-ONLY0-NEXT: [[TMP2047:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3925]] +// SIMD-ONLY0: cond.end3925: +// SIMD-ONLY0-NEXT: [[COND3926:%.*]] = phi i64 [ [[TMP2046]], [[COND_TRUE3923]] ], [ [[TMP2047]], [[COND_FALSE3924]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3926]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2048:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2049:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3927:%.*]] = icmp eq i64 [[TMP2048]], [[TMP2049]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3927]], label [[COND_TRUE3929:%.*]], label [[COND_FALSE3930:%.*]] +// SIMD-ONLY0: cond.true3929: +// SIMD-ONLY0-NEXT: [[TMP2050:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3931:%.*]] +// SIMD-ONLY0: cond.false3930: +// SIMD-ONLY0-NEXT: [[TMP2051:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3931]] +// SIMD-ONLY0: cond.end3931: +// SIMD-ONLY0-NEXT: [[COND3932:%.*]] = phi i64 [ [[TMP2050]], [[COND_TRUE3929]] ], [ [[TMP2051]], [[COND_FALSE3930]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3932]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2052:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2053:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3933:%.*]] = icmp eq i64 [[TMP2052]], [[TMP2053]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3933]], label [[IF_THEN3935:%.*]], label [[IF_END3936:%.*]] +// SIMD-ONLY0: if.then3935: +// SIMD-ONLY0-NEXT: [[TMP2054:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2054]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3936]] +// SIMD-ONLY0: if.end3936: +// SIMD-ONLY0-NEXT: [[TMP2055:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2056:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3937:%.*]] = icmp eq i64 [[TMP2055]], [[TMP2056]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3937]], label [[IF_THEN3939:%.*]], label [[IF_END3940:%.*]] +// SIMD-ONLY0: if.then3939: +// SIMD-ONLY0-NEXT: [[TMP2057:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2057]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3940]] +// SIMD-ONLY0: if.end3940: +// SIMD-ONLY0-NEXT: [[TMP2058:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2059:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3941:%.*]] = icmp ugt i64 [[TMP2058]], [[TMP2059]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3941]], label [[COND_TRUE3943:%.*]], label [[COND_FALSE3944:%.*]] +// SIMD-ONLY0: cond.true3943: +// SIMD-ONLY0-NEXT: [[TMP2060:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3945:%.*]] +// SIMD-ONLY0: cond.false3944: +// SIMD-ONLY0-NEXT: [[TMP2061:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3945]] +// SIMD-ONLY0: cond.end3945: +// SIMD-ONLY0-NEXT: [[COND3946:%.*]] = phi i64 [ [[TMP2060]], [[COND_TRUE3943]] ], [ [[TMP2061]], [[COND_FALSE3944]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3946]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2062:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2063:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3947:%.*]] = icmp ult i64 [[TMP2062]], [[TMP2063]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3947]], label [[COND_TRUE3949:%.*]], label [[COND_FALSE3950:%.*]] +// SIMD-ONLY0: cond.true3949: +// SIMD-ONLY0-NEXT: [[TMP2064:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3951:%.*]] +// SIMD-ONLY0: cond.false3950: +// SIMD-ONLY0-NEXT: [[TMP2065:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3951]] +// SIMD-ONLY0: cond.end3951: +// SIMD-ONLY0-NEXT: [[COND3952:%.*]] = phi i64 [ [[TMP2064]], [[COND_TRUE3949]] ], [ [[TMP2065]], [[COND_FALSE3950]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3952]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2066:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2067:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3953:%.*]] = icmp ugt i64 [[TMP2066]], [[TMP2067]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3953]], label [[COND_TRUE3955:%.*]], label [[COND_FALSE3956:%.*]] +// SIMD-ONLY0: cond.true3955: +// SIMD-ONLY0-NEXT: [[TMP2068:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3957:%.*]] +// SIMD-ONLY0: cond.false3956: +// SIMD-ONLY0-NEXT: [[TMP2069:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3957]] +// SIMD-ONLY0: cond.end3957: +// SIMD-ONLY0-NEXT: [[COND3958:%.*]] = phi i64 [ [[TMP2068]], [[COND_TRUE3955]] ], [ [[TMP2069]], [[COND_FALSE3956]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3958]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2070:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2071:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3959:%.*]] = icmp ult i64 [[TMP2070]], [[TMP2071]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3959]], label [[COND_TRUE3961:%.*]], label [[COND_FALSE3962:%.*]] +// SIMD-ONLY0: cond.true3961: +// SIMD-ONLY0-NEXT: [[TMP2072:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3963:%.*]] +// SIMD-ONLY0: cond.false3962: +// SIMD-ONLY0-NEXT: [[TMP2073:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3963]] +// SIMD-ONLY0: cond.end3963: +// SIMD-ONLY0-NEXT: [[COND3964:%.*]] = phi i64 [ [[TMP2072]], [[COND_TRUE3961]] ], [ [[TMP2073]], [[COND_FALSE3962]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3964]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2074:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2075:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3965:%.*]] = icmp ugt i64 [[TMP2074]], [[TMP2075]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3965]], label [[IF_THEN3967:%.*]], label [[IF_END3968:%.*]] +// SIMD-ONLY0: if.then3967: +// SIMD-ONLY0-NEXT: [[TMP2076:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2076]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3968]] +// SIMD-ONLY0: if.end3968: +// SIMD-ONLY0-NEXT: [[TMP2077:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2078:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3969:%.*]] = icmp ult i64 [[TMP2077]], [[TMP2078]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3969]], label [[IF_THEN3971:%.*]], label [[IF_END3972:%.*]] +// SIMD-ONLY0: if.then3971: +// SIMD-ONLY0-NEXT: [[TMP2079:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2079]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3972]] +// SIMD-ONLY0: if.end3972: +// SIMD-ONLY0-NEXT: [[TMP2080:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2081:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3973:%.*]] = icmp ugt i64 [[TMP2080]], [[TMP2081]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3973]], label [[IF_THEN3975:%.*]], label [[IF_END3976:%.*]] +// SIMD-ONLY0: if.then3975: +// SIMD-ONLY0-NEXT: [[TMP2082:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2082]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3976]] +// SIMD-ONLY0: if.end3976: +// SIMD-ONLY0-NEXT: [[TMP2083:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2084:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3977:%.*]] = icmp ult i64 [[TMP2083]], [[TMP2084]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3977]], label [[IF_THEN3979:%.*]], label [[IF_END3980:%.*]] +// SIMD-ONLY0: if.then3979: +// SIMD-ONLY0-NEXT: [[TMP2085:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2085]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3980]] +// SIMD-ONLY0: if.end3980: +// SIMD-ONLY0-NEXT: [[TMP2086:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2087:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3981:%.*]] = icmp eq i64 [[TMP2086]], [[TMP2087]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3981]], label [[COND_TRUE3983:%.*]], label [[COND_FALSE3984:%.*]] +// SIMD-ONLY0: cond.true3983: +// SIMD-ONLY0-NEXT: [[TMP2088:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3985:%.*]] +// SIMD-ONLY0: cond.false3984: +// SIMD-ONLY0-NEXT: [[TMP2089:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3985]] +// SIMD-ONLY0: cond.end3985: +// SIMD-ONLY0-NEXT: [[COND3986:%.*]] = phi i64 [ [[TMP2088]], [[COND_TRUE3983]] ], [ [[TMP2089]], [[COND_FALSE3984]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3986]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2090:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2091:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3987:%.*]] = icmp eq i64 [[TMP2090]], [[TMP2091]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3987]], label [[COND_TRUE3989:%.*]], label [[COND_FALSE3990:%.*]] +// SIMD-ONLY0: cond.true3989: +// SIMD-ONLY0-NEXT: [[TMP2092:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3991:%.*]] +// SIMD-ONLY0: cond.false3990: +// SIMD-ONLY0-NEXT: [[TMP2093:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END3991]] +// SIMD-ONLY0: cond.end3991: +// SIMD-ONLY0-NEXT: [[COND3992:%.*]] = phi i64 [ [[TMP2092]], [[COND_TRUE3989]] ], [ [[TMP2093]], [[COND_FALSE3990]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND3992]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2094:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2095:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3993:%.*]] = icmp eq i64 [[TMP2094]], [[TMP2095]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3993]], label [[IF_THEN3995:%.*]], label [[IF_END3996:%.*]] +// SIMD-ONLY0: if.then3995: +// SIMD-ONLY0-NEXT: [[TMP2096:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2096]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3996]] +// SIMD-ONLY0: if.end3996: +// SIMD-ONLY0-NEXT: [[TMP2097:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2098:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3997:%.*]] = icmp eq i64 [[TMP2097]], [[TMP2098]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3997]], label [[IF_THEN3999:%.*]], label [[IF_END4000:%.*]] +// SIMD-ONLY0: if.then3999: +// SIMD-ONLY0-NEXT: [[TMP2099:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2099]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4000]] +// SIMD-ONLY0: if.end4000: +// SIMD-ONLY0-NEXT: [[TMP2100:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2101:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4001:%.*]] = icmp sgt i64 [[TMP2100]], [[TMP2101]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4001]], label [[COND_TRUE4003:%.*]], label [[COND_FALSE4004:%.*]] +// SIMD-ONLY0: cond.true4003: +// SIMD-ONLY0-NEXT: [[TMP2102:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4005:%.*]] +// SIMD-ONLY0: cond.false4004: +// SIMD-ONLY0-NEXT: [[TMP2103:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4005]] +// SIMD-ONLY0: cond.end4005: +// SIMD-ONLY0-NEXT: [[COND4006:%.*]] = phi i64 [ [[TMP2102]], [[COND_TRUE4003]] ], [ [[TMP2103]], [[COND_FALSE4004]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4006]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2104:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2105:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4007:%.*]] = icmp slt i64 [[TMP2104]], [[TMP2105]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4007]], label [[COND_TRUE4009:%.*]], label [[COND_FALSE4010:%.*]] +// SIMD-ONLY0: cond.true4009: +// SIMD-ONLY0-NEXT: [[TMP2106:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4011:%.*]] +// SIMD-ONLY0: cond.false4010: +// SIMD-ONLY0-NEXT: [[TMP2107:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4011]] +// SIMD-ONLY0: cond.end4011: +// SIMD-ONLY0-NEXT: [[COND4012:%.*]] = phi i64 [ [[TMP2106]], [[COND_TRUE4009]] ], [ [[TMP2107]], [[COND_FALSE4010]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4012]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2108:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2109:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4013:%.*]] = icmp sgt i64 [[TMP2108]], [[TMP2109]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4013]], label [[COND_TRUE4015:%.*]], label [[COND_FALSE4016:%.*]] +// SIMD-ONLY0: cond.true4015: +// SIMD-ONLY0-NEXT: [[TMP2110:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4017:%.*]] +// SIMD-ONLY0: cond.false4016: +// SIMD-ONLY0-NEXT: [[TMP2111:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4017]] +// SIMD-ONLY0: cond.end4017: +// SIMD-ONLY0-NEXT: [[COND4018:%.*]] = phi i64 [ [[TMP2110]], [[COND_TRUE4015]] ], [ [[TMP2111]], [[COND_FALSE4016]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4018]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2112:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2113:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4019:%.*]] = icmp slt i64 [[TMP2112]], [[TMP2113]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4019]], label [[COND_TRUE4021:%.*]], label [[COND_FALSE4022:%.*]] +// SIMD-ONLY0: cond.true4021: +// SIMD-ONLY0-NEXT: [[TMP2114:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4023:%.*]] +// SIMD-ONLY0: cond.false4022: +// SIMD-ONLY0-NEXT: [[TMP2115:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4023]] +// SIMD-ONLY0: cond.end4023: +// SIMD-ONLY0-NEXT: [[COND4024:%.*]] = phi i64 [ [[TMP2114]], [[COND_TRUE4021]] ], [ [[TMP2115]], [[COND_FALSE4022]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4024]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2116:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2117:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4025:%.*]] = icmp sgt i64 [[TMP2116]], [[TMP2117]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4025]], label [[IF_THEN4027:%.*]], label [[IF_END4028:%.*]] +// SIMD-ONLY0: if.then4027: +// SIMD-ONLY0-NEXT: [[TMP2118:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2118]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4028]] +// SIMD-ONLY0: if.end4028: +// SIMD-ONLY0-NEXT: [[TMP2119:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2120:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4029:%.*]] = icmp slt i64 [[TMP2119]], [[TMP2120]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4029]], label [[IF_THEN4031:%.*]], label [[IF_END4032:%.*]] +// SIMD-ONLY0: if.then4031: +// SIMD-ONLY0-NEXT: [[TMP2121:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2121]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4032]] +// SIMD-ONLY0: if.end4032: +// SIMD-ONLY0-NEXT: [[TMP2122:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2123:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4033:%.*]] = icmp sgt i64 [[TMP2122]], [[TMP2123]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4033]], label [[IF_THEN4035:%.*]], label [[IF_END4036:%.*]] +// SIMD-ONLY0: if.then4035: +// SIMD-ONLY0-NEXT: [[TMP2124:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2124]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4036]] +// SIMD-ONLY0: if.end4036: +// SIMD-ONLY0-NEXT: [[TMP2125:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2126:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4037:%.*]] = icmp slt i64 [[TMP2125]], [[TMP2126]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4037]], label [[IF_THEN4039:%.*]], label [[IF_END4040:%.*]] +// SIMD-ONLY0: if.then4039: +// SIMD-ONLY0-NEXT: [[TMP2127:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2127]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4040]] +// SIMD-ONLY0: if.end4040: +// SIMD-ONLY0-NEXT: [[TMP2128:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2129:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4041:%.*]] = icmp eq i64 [[TMP2128]], [[TMP2129]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4041]], label [[COND_TRUE4043:%.*]], label [[COND_FALSE4044:%.*]] +// SIMD-ONLY0: cond.true4043: +// SIMD-ONLY0-NEXT: [[TMP2130:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4045:%.*]] +// SIMD-ONLY0: cond.false4044: +// SIMD-ONLY0-NEXT: [[TMP2131:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4045]] +// SIMD-ONLY0: cond.end4045: +// SIMD-ONLY0-NEXT: [[COND4046:%.*]] = phi i64 [ [[TMP2130]], [[COND_TRUE4043]] ], [ [[TMP2131]], [[COND_FALSE4044]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4046]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2132:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2133:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4047:%.*]] = icmp eq i64 [[TMP2132]], [[TMP2133]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4047]], label [[COND_TRUE4049:%.*]], label [[COND_FALSE4050:%.*]] +// SIMD-ONLY0: cond.true4049: +// SIMD-ONLY0-NEXT: [[TMP2134:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4051:%.*]] +// SIMD-ONLY0: cond.false4050: +// SIMD-ONLY0-NEXT: [[TMP2135:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4051]] +// SIMD-ONLY0: cond.end4051: +// SIMD-ONLY0-NEXT: [[COND4052:%.*]] = phi i64 [ [[TMP2134]], [[COND_TRUE4049]] ], [ [[TMP2135]], [[COND_FALSE4050]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4052]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2136:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2137:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4053:%.*]] = icmp eq i64 [[TMP2136]], [[TMP2137]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4053]], label [[IF_THEN4055:%.*]], label [[IF_END4056:%.*]] +// SIMD-ONLY0: if.then4055: +// SIMD-ONLY0-NEXT: [[TMP2138:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2138]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4056]] +// SIMD-ONLY0: if.end4056: +// SIMD-ONLY0-NEXT: [[TMP2139:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2140:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4057:%.*]] = icmp eq i64 [[TMP2139]], [[TMP2140]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4057]], label [[IF_THEN4059:%.*]], label [[IF_END4060:%.*]] +// SIMD-ONLY0: if.then4059: +// SIMD-ONLY0-NEXT: [[TMP2141:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2141]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4060]] +// SIMD-ONLY0: if.end4060: +// SIMD-ONLY0-NEXT: [[TMP2142:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2143:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4061:%.*]] = icmp ugt i64 [[TMP2142]], [[TMP2143]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4061]], label [[COND_TRUE4063:%.*]], label [[COND_FALSE4064:%.*]] +// SIMD-ONLY0: cond.true4063: +// SIMD-ONLY0-NEXT: [[TMP2144:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4065:%.*]] +// SIMD-ONLY0: cond.false4064: +// SIMD-ONLY0-NEXT: [[TMP2145:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4065]] +// SIMD-ONLY0: cond.end4065: +// SIMD-ONLY0-NEXT: [[COND4066:%.*]] = phi i64 [ [[TMP2144]], [[COND_TRUE4063]] ], [ [[TMP2145]], [[COND_FALSE4064]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4066]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2146:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2147:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4067:%.*]] = icmp ult i64 [[TMP2146]], [[TMP2147]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4067]], label [[COND_TRUE4069:%.*]], label [[COND_FALSE4070:%.*]] +// SIMD-ONLY0: cond.true4069: +// SIMD-ONLY0-NEXT: [[TMP2148:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4071:%.*]] +// SIMD-ONLY0: cond.false4070: +// SIMD-ONLY0-NEXT: [[TMP2149:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4071]] +// SIMD-ONLY0: cond.end4071: +// SIMD-ONLY0-NEXT: [[COND4072:%.*]] = phi i64 [ [[TMP2148]], [[COND_TRUE4069]] ], [ [[TMP2149]], [[COND_FALSE4070]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4072]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2150:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2151:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4073:%.*]] = icmp ugt i64 [[TMP2150]], [[TMP2151]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4073]], label [[COND_TRUE4075:%.*]], label [[COND_FALSE4076:%.*]] +// SIMD-ONLY0: cond.true4075: +// SIMD-ONLY0-NEXT: [[TMP2152:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4077:%.*]] +// SIMD-ONLY0: cond.false4076: +// SIMD-ONLY0-NEXT: [[TMP2153:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4077]] +// SIMD-ONLY0: cond.end4077: +// SIMD-ONLY0-NEXT: [[COND4078:%.*]] = phi i64 [ [[TMP2152]], [[COND_TRUE4075]] ], [ [[TMP2153]], [[COND_FALSE4076]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4078]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2154:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2155:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4079:%.*]] = icmp ult i64 [[TMP2154]], [[TMP2155]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4079]], label [[COND_TRUE4081:%.*]], label [[COND_FALSE4082:%.*]] +// SIMD-ONLY0: cond.true4081: +// SIMD-ONLY0-NEXT: [[TMP2156:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4083:%.*]] +// SIMD-ONLY0: cond.false4082: +// SIMD-ONLY0-NEXT: [[TMP2157:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4083]] +// SIMD-ONLY0: cond.end4083: +// SIMD-ONLY0-NEXT: [[COND4084:%.*]] = phi i64 [ [[TMP2156]], [[COND_TRUE4081]] ], [ [[TMP2157]], [[COND_FALSE4082]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4084]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2158:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2159:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4085:%.*]] = icmp ugt i64 [[TMP2158]], [[TMP2159]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4085]], label [[IF_THEN4087:%.*]], label [[IF_END4088:%.*]] +// SIMD-ONLY0: if.then4087: +// SIMD-ONLY0-NEXT: [[TMP2160:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2160]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4088]] +// SIMD-ONLY0: if.end4088: +// SIMD-ONLY0-NEXT: [[TMP2161:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2162:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4089:%.*]] = icmp ult i64 [[TMP2161]], [[TMP2162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4089]], label [[IF_THEN4091:%.*]], label [[IF_END4092:%.*]] +// SIMD-ONLY0: if.then4091: +// SIMD-ONLY0-NEXT: [[TMP2163:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2163]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4092]] +// SIMD-ONLY0: if.end4092: +// SIMD-ONLY0-NEXT: [[TMP2164:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2165:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4093:%.*]] = icmp ugt i64 [[TMP2164]], [[TMP2165]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4093]], label [[IF_THEN4095:%.*]], label [[IF_END4096:%.*]] +// SIMD-ONLY0: if.then4095: +// SIMD-ONLY0-NEXT: [[TMP2166:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2166]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4096]] +// SIMD-ONLY0: if.end4096: +// SIMD-ONLY0-NEXT: [[TMP2167:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2168:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4097:%.*]] = icmp ult i64 [[TMP2167]], [[TMP2168]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4097]], label [[IF_THEN4099:%.*]], label [[IF_END4100:%.*]] +// SIMD-ONLY0: if.then4099: +// SIMD-ONLY0-NEXT: [[TMP2169:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2169]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4100]] +// SIMD-ONLY0: if.end4100: +// SIMD-ONLY0-NEXT: [[TMP2170:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2171:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4101:%.*]] = icmp eq i64 [[TMP2170]], [[TMP2171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4101]], label [[COND_TRUE4103:%.*]], label [[COND_FALSE4104:%.*]] +// SIMD-ONLY0: cond.true4103: +// SIMD-ONLY0-NEXT: [[TMP2172:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4105:%.*]] +// SIMD-ONLY0: cond.false4104: +// SIMD-ONLY0-NEXT: [[TMP2173:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4105]] +// SIMD-ONLY0: cond.end4105: +// SIMD-ONLY0-NEXT: [[COND4106:%.*]] = phi i64 [ [[TMP2172]], [[COND_TRUE4103]] ], [ [[TMP2173]], [[COND_FALSE4104]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4106]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2174:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2175:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4107:%.*]] = icmp eq i64 [[TMP2174]], [[TMP2175]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4107]], label [[COND_TRUE4109:%.*]], label [[COND_FALSE4110:%.*]] +// SIMD-ONLY0: cond.true4109: +// SIMD-ONLY0-NEXT: [[TMP2176:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4111:%.*]] +// SIMD-ONLY0: cond.false4110: +// SIMD-ONLY0-NEXT: [[TMP2177:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4111]] +// SIMD-ONLY0: cond.end4111: +// SIMD-ONLY0-NEXT: [[COND4112:%.*]] = phi i64 [ [[TMP2176]], [[COND_TRUE4109]] ], [ [[TMP2177]], [[COND_FALSE4110]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4112]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2178:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2179:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4113:%.*]] = icmp eq i64 [[TMP2178]], [[TMP2179]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4113]], label [[IF_THEN4115:%.*]], label [[IF_END4116:%.*]] +// SIMD-ONLY0: if.then4115: +// SIMD-ONLY0-NEXT: [[TMP2180:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2180]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4116]] +// SIMD-ONLY0: if.end4116: +// SIMD-ONLY0-NEXT: [[TMP2181:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2182:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4117:%.*]] = icmp eq i64 [[TMP2181]], [[TMP2182]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4117]], label [[IF_THEN4119:%.*]], label [[IF_END4120:%.*]] +// SIMD-ONLY0: if.then4119: +// SIMD-ONLY0-NEXT: [[TMP2183:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2183]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4120]] +// SIMD-ONLY0: if.end4120: +// SIMD-ONLY0-NEXT: [[TMP2184:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2185:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4121:%.*]] = icmp sgt i64 [[TMP2184]], [[TMP2185]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4121]], label [[COND_TRUE4123:%.*]], label [[COND_FALSE4124:%.*]] +// SIMD-ONLY0: cond.true4123: +// SIMD-ONLY0-NEXT: [[TMP2186:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4125:%.*]] +// SIMD-ONLY0: cond.false4124: +// SIMD-ONLY0-NEXT: [[TMP2187:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4125]] +// SIMD-ONLY0: cond.end4125: +// SIMD-ONLY0-NEXT: [[COND4126:%.*]] = phi i64 [ [[TMP2186]], [[COND_TRUE4123]] ], [ [[TMP2187]], [[COND_FALSE4124]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4126]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2188:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2189:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4127:%.*]] = icmp slt i64 [[TMP2188]], [[TMP2189]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4127]], label [[COND_TRUE4129:%.*]], label [[COND_FALSE4130:%.*]] +// SIMD-ONLY0: cond.true4129: +// SIMD-ONLY0-NEXT: [[TMP2190:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4131:%.*]] +// SIMD-ONLY0: cond.false4130: +// SIMD-ONLY0-NEXT: [[TMP2191:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4131]] +// SIMD-ONLY0: cond.end4131: +// SIMD-ONLY0-NEXT: [[COND4132:%.*]] = phi i64 [ [[TMP2190]], [[COND_TRUE4129]] ], [ [[TMP2191]], [[COND_FALSE4130]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4132]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2192:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2193:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4133:%.*]] = icmp sgt i64 [[TMP2192]], [[TMP2193]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4133]], label [[COND_TRUE4135:%.*]], label [[COND_FALSE4136:%.*]] +// SIMD-ONLY0: cond.true4135: +// SIMD-ONLY0-NEXT: [[TMP2194:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4137:%.*]] +// SIMD-ONLY0: cond.false4136: +// SIMD-ONLY0-NEXT: [[TMP2195:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4137]] +// SIMD-ONLY0: cond.end4137: +// SIMD-ONLY0-NEXT: [[COND4138:%.*]] = phi i64 [ [[TMP2194]], [[COND_TRUE4135]] ], [ [[TMP2195]], [[COND_FALSE4136]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4138]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2196:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2197:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4139:%.*]] = icmp slt i64 [[TMP2196]], [[TMP2197]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4139]], label [[COND_TRUE4141:%.*]], label [[COND_FALSE4142:%.*]] +// SIMD-ONLY0: cond.true4141: +// SIMD-ONLY0-NEXT: [[TMP2198:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4143:%.*]] +// SIMD-ONLY0: cond.false4142: +// SIMD-ONLY0-NEXT: [[TMP2199:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4143]] +// SIMD-ONLY0: cond.end4143: +// SIMD-ONLY0-NEXT: [[COND4144:%.*]] = phi i64 [ [[TMP2198]], [[COND_TRUE4141]] ], [ [[TMP2199]], [[COND_FALSE4142]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4144]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2200:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2201:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4145:%.*]] = icmp sgt i64 [[TMP2200]], [[TMP2201]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4145]], label [[IF_THEN4147:%.*]], label [[IF_END4148:%.*]] +// SIMD-ONLY0: if.then4147: +// SIMD-ONLY0-NEXT: [[TMP2202:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2202]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4148]] +// SIMD-ONLY0: if.end4148: +// SIMD-ONLY0-NEXT: [[TMP2203:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2204:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4149:%.*]] = icmp slt i64 [[TMP2203]], [[TMP2204]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4149]], label [[IF_THEN4151:%.*]], label [[IF_END4152:%.*]] +// SIMD-ONLY0: if.then4151: +// SIMD-ONLY0-NEXT: [[TMP2205:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2205]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4152]] +// SIMD-ONLY0: if.end4152: +// SIMD-ONLY0-NEXT: [[TMP2206:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2207:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4153:%.*]] = icmp sgt i64 [[TMP2206]], [[TMP2207]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4153]], label [[IF_THEN4155:%.*]], label [[IF_END4156:%.*]] +// SIMD-ONLY0: if.then4155: +// SIMD-ONLY0-NEXT: [[TMP2208:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2208]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4156]] +// SIMD-ONLY0: if.end4156: +// SIMD-ONLY0-NEXT: [[TMP2209:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2210:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4157:%.*]] = icmp slt i64 [[TMP2209]], [[TMP2210]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4157]], label [[IF_THEN4159:%.*]], label [[IF_END4160:%.*]] +// SIMD-ONLY0: if.then4159: +// SIMD-ONLY0-NEXT: [[TMP2211:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2211]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4160]] +// SIMD-ONLY0: if.end4160: +// SIMD-ONLY0-NEXT: [[TMP2212:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2213:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4161:%.*]] = icmp eq i64 [[TMP2212]], [[TMP2213]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4161]], label [[COND_TRUE4163:%.*]], label [[COND_FALSE4164:%.*]] +// SIMD-ONLY0: cond.true4163: +// SIMD-ONLY0-NEXT: [[TMP2214:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4165:%.*]] +// SIMD-ONLY0: cond.false4164: +// SIMD-ONLY0-NEXT: [[TMP2215:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4165]] +// SIMD-ONLY0: cond.end4165: +// SIMD-ONLY0-NEXT: [[COND4166:%.*]] = phi i64 [ [[TMP2214]], [[COND_TRUE4163]] ], [ [[TMP2215]], [[COND_FALSE4164]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4166]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2216:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2217:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4167:%.*]] = icmp eq i64 [[TMP2216]], [[TMP2217]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4167]], label [[COND_TRUE4169:%.*]], label [[COND_FALSE4170:%.*]] +// SIMD-ONLY0: cond.true4169: +// SIMD-ONLY0-NEXT: [[TMP2218:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4171:%.*]] +// SIMD-ONLY0: cond.false4170: +// SIMD-ONLY0-NEXT: [[TMP2219:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4171]] +// SIMD-ONLY0: cond.end4171: +// SIMD-ONLY0-NEXT: [[COND4172:%.*]] = phi i64 [ [[TMP2218]], [[COND_TRUE4169]] ], [ [[TMP2219]], [[COND_FALSE4170]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4172]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2220:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2221:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4173:%.*]] = icmp eq i64 [[TMP2220]], [[TMP2221]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4173]], label [[IF_THEN4175:%.*]], label [[IF_END4176:%.*]] +// SIMD-ONLY0: if.then4175: +// SIMD-ONLY0-NEXT: [[TMP2222:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2222]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4176]] +// SIMD-ONLY0: if.end4176: +// SIMD-ONLY0-NEXT: [[TMP2223:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2224:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4177:%.*]] = icmp eq i64 [[TMP2223]], [[TMP2224]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4177]], label [[IF_THEN4179:%.*]], label [[IF_END4180:%.*]] +// SIMD-ONLY0: if.then4179: +// SIMD-ONLY0-NEXT: [[TMP2225:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2225]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4180]] +// SIMD-ONLY0: if.end4180: +// SIMD-ONLY0-NEXT: [[TMP2226:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2227:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4181:%.*]] = icmp ugt i64 [[TMP2226]], [[TMP2227]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4181]], label [[COND_TRUE4183:%.*]], label [[COND_FALSE4184:%.*]] +// SIMD-ONLY0: cond.true4183: +// SIMD-ONLY0-NEXT: [[TMP2228:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4185:%.*]] +// SIMD-ONLY0: cond.false4184: +// SIMD-ONLY0-NEXT: [[TMP2229:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4185]] +// SIMD-ONLY0: cond.end4185: +// SIMD-ONLY0-NEXT: [[COND4186:%.*]] = phi i64 [ [[TMP2228]], [[COND_TRUE4183]] ], [ [[TMP2229]], [[COND_FALSE4184]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4186]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2230:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2231:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4187:%.*]] = icmp ult i64 [[TMP2230]], [[TMP2231]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4187]], label [[COND_TRUE4189:%.*]], label [[COND_FALSE4190:%.*]] +// SIMD-ONLY0: cond.true4189: +// SIMD-ONLY0-NEXT: [[TMP2232:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4191:%.*]] +// SIMD-ONLY0: cond.false4190: +// SIMD-ONLY0-NEXT: [[TMP2233:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4191]] +// SIMD-ONLY0: cond.end4191: +// SIMD-ONLY0-NEXT: [[COND4192:%.*]] = phi i64 [ [[TMP2232]], [[COND_TRUE4189]] ], [ [[TMP2233]], [[COND_FALSE4190]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4192]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2234:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2235:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4193:%.*]] = icmp ugt i64 [[TMP2234]], [[TMP2235]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4193]], label [[COND_TRUE4195:%.*]], label [[COND_FALSE4196:%.*]] +// SIMD-ONLY0: cond.true4195: +// SIMD-ONLY0-NEXT: [[TMP2236:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4197:%.*]] +// SIMD-ONLY0: cond.false4196: +// SIMD-ONLY0-NEXT: [[TMP2237:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4197]] +// SIMD-ONLY0: cond.end4197: +// SIMD-ONLY0-NEXT: [[COND4198:%.*]] = phi i64 [ [[TMP2236]], [[COND_TRUE4195]] ], [ [[TMP2237]], [[COND_FALSE4196]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4198]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2238:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2239:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4199:%.*]] = icmp ult i64 [[TMP2238]], [[TMP2239]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4199]], label [[COND_TRUE4201:%.*]], label [[COND_FALSE4202:%.*]] +// SIMD-ONLY0: cond.true4201: +// SIMD-ONLY0-NEXT: [[TMP2240:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4203:%.*]] +// SIMD-ONLY0: cond.false4202: +// SIMD-ONLY0-NEXT: [[TMP2241:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4203]] +// SIMD-ONLY0: cond.end4203: +// SIMD-ONLY0-NEXT: [[COND4204:%.*]] = phi i64 [ [[TMP2240]], [[COND_TRUE4201]] ], [ [[TMP2241]], [[COND_FALSE4202]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4204]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2242:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2243:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4205:%.*]] = icmp ugt i64 [[TMP2242]], [[TMP2243]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4205]], label [[IF_THEN4207:%.*]], label [[IF_END4208:%.*]] +// SIMD-ONLY0: if.then4207: +// SIMD-ONLY0-NEXT: [[TMP2244:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2244]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4208]] +// SIMD-ONLY0: if.end4208: +// SIMD-ONLY0-NEXT: [[TMP2245:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2246:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4209:%.*]] = icmp ult i64 [[TMP2245]], [[TMP2246]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4209]], label [[IF_THEN4211:%.*]], label [[IF_END4212:%.*]] +// SIMD-ONLY0: if.then4211: +// SIMD-ONLY0-NEXT: [[TMP2247:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2247]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4212]] +// SIMD-ONLY0: if.end4212: +// SIMD-ONLY0-NEXT: [[TMP2248:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2249:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4213:%.*]] = icmp ugt i64 [[TMP2248]], [[TMP2249]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4213]], label [[IF_THEN4215:%.*]], label [[IF_END4216:%.*]] +// SIMD-ONLY0: if.then4215: +// SIMD-ONLY0-NEXT: [[TMP2250:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2250]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4216]] +// SIMD-ONLY0: if.end4216: +// SIMD-ONLY0-NEXT: [[TMP2251:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2252:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4217:%.*]] = icmp ult i64 [[TMP2251]], [[TMP2252]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4217]], label [[IF_THEN4219:%.*]], label [[IF_END4220:%.*]] +// SIMD-ONLY0: if.then4219: +// SIMD-ONLY0-NEXT: [[TMP2253:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2253]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4220]] +// SIMD-ONLY0: if.end4220: +// SIMD-ONLY0-NEXT: [[TMP2254:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2255:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4221:%.*]] = icmp eq i64 [[TMP2254]], [[TMP2255]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4221]], label [[COND_TRUE4223:%.*]], label [[COND_FALSE4224:%.*]] +// SIMD-ONLY0: cond.true4223: +// SIMD-ONLY0-NEXT: [[TMP2256:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4225:%.*]] +// SIMD-ONLY0: cond.false4224: +// SIMD-ONLY0-NEXT: [[TMP2257:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4225]] +// SIMD-ONLY0: cond.end4225: +// SIMD-ONLY0-NEXT: [[COND4226:%.*]] = phi i64 [ [[TMP2256]], [[COND_TRUE4223]] ], [ [[TMP2257]], [[COND_FALSE4224]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4226]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2258:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2259:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4227:%.*]] = icmp eq i64 [[TMP2258]], [[TMP2259]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4227]], label [[COND_TRUE4229:%.*]], label [[COND_FALSE4230:%.*]] +// SIMD-ONLY0: cond.true4229: +// SIMD-ONLY0-NEXT: [[TMP2260:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4231:%.*]] +// SIMD-ONLY0: cond.false4230: +// SIMD-ONLY0-NEXT: [[TMP2261:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4231]] +// SIMD-ONLY0: cond.end4231: +// SIMD-ONLY0-NEXT: [[COND4232:%.*]] = phi i64 [ [[TMP2260]], [[COND_TRUE4229]] ], [ [[TMP2261]], [[COND_FALSE4230]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4232]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2262:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2263:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4233:%.*]] = icmp eq i64 [[TMP2262]], [[TMP2263]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4233]], label [[IF_THEN4235:%.*]], label [[IF_END4236:%.*]] +// SIMD-ONLY0: if.then4235: +// SIMD-ONLY0-NEXT: [[TMP2264:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2264]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4236]] +// SIMD-ONLY0: if.end4236: +// SIMD-ONLY0-NEXT: [[TMP2265:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2266:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4237:%.*]] = icmp eq i64 [[TMP2265]], [[TMP2266]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4237]], label [[IF_THEN4239:%.*]], label [[IF_END4240:%.*]] +// SIMD-ONLY0: if.then4239: +// SIMD-ONLY0-NEXT: [[TMP2267:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2267]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4240]] +// SIMD-ONLY0: if.end4240: +// SIMD-ONLY0-NEXT: [[TMP2268:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2269:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4241:%.*]] = icmp sgt i64 [[TMP2268]], [[TMP2269]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4241]], label [[COND_TRUE4243:%.*]], label [[COND_FALSE4244:%.*]] +// SIMD-ONLY0: cond.true4243: +// SIMD-ONLY0-NEXT: [[TMP2270:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4245:%.*]] +// SIMD-ONLY0: cond.false4244: +// SIMD-ONLY0-NEXT: [[TMP2271:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4245]] +// SIMD-ONLY0: cond.end4245: +// SIMD-ONLY0-NEXT: [[COND4246:%.*]] = phi i64 [ [[TMP2270]], [[COND_TRUE4243]] ], [ [[TMP2271]], [[COND_FALSE4244]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4246]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2272:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2273:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4247:%.*]] = icmp slt i64 [[TMP2272]], [[TMP2273]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4247]], label [[COND_TRUE4249:%.*]], label [[COND_FALSE4250:%.*]] +// SIMD-ONLY0: cond.true4249: +// SIMD-ONLY0-NEXT: [[TMP2274:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4251:%.*]] +// SIMD-ONLY0: cond.false4250: +// SIMD-ONLY0-NEXT: [[TMP2275:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4251]] +// SIMD-ONLY0: cond.end4251: +// SIMD-ONLY0-NEXT: [[COND4252:%.*]] = phi i64 [ [[TMP2274]], [[COND_TRUE4249]] ], [ [[TMP2275]], [[COND_FALSE4250]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4252]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2276:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2277:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4253:%.*]] = icmp sgt i64 [[TMP2276]], [[TMP2277]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4253]], label [[COND_TRUE4255:%.*]], label [[COND_FALSE4256:%.*]] +// SIMD-ONLY0: cond.true4255: +// SIMD-ONLY0-NEXT: [[TMP2278:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4257:%.*]] +// SIMD-ONLY0: cond.false4256: +// SIMD-ONLY0-NEXT: [[TMP2279:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4257]] +// SIMD-ONLY0: cond.end4257: +// SIMD-ONLY0-NEXT: [[COND4258:%.*]] = phi i64 [ [[TMP2278]], [[COND_TRUE4255]] ], [ [[TMP2279]], [[COND_FALSE4256]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4258]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2280:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2281:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4259:%.*]] = icmp slt i64 [[TMP2280]], [[TMP2281]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4259]], label [[COND_TRUE4261:%.*]], label [[COND_FALSE4262:%.*]] +// SIMD-ONLY0: cond.true4261: +// SIMD-ONLY0-NEXT: [[TMP2282:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4263:%.*]] +// SIMD-ONLY0: cond.false4262: +// SIMD-ONLY0-NEXT: [[TMP2283:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4263]] +// SIMD-ONLY0: cond.end4263: +// SIMD-ONLY0-NEXT: [[COND4264:%.*]] = phi i64 [ [[TMP2282]], [[COND_TRUE4261]] ], [ [[TMP2283]], [[COND_FALSE4262]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4264]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2284:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2285:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4265:%.*]] = icmp sgt i64 [[TMP2284]], [[TMP2285]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4265]], label [[IF_THEN4267:%.*]], label [[IF_END4268:%.*]] +// SIMD-ONLY0: if.then4267: +// SIMD-ONLY0-NEXT: [[TMP2286:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2286]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4268]] +// SIMD-ONLY0: if.end4268: +// SIMD-ONLY0-NEXT: [[TMP2287:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2288:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4269:%.*]] = icmp slt i64 [[TMP2287]], [[TMP2288]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4269]], label [[IF_THEN4271:%.*]], label [[IF_END4272:%.*]] +// SIMD-ONLY0: if.then4271: +// SIMD-ONLY0-NEXT: [[TMP2289:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2289]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4272]] +// SIMD-ONLY0: if.end4272: +// SIMD-ONLY0-NEXT: [[TMP2290:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2291:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4273:%.*]] = icmp sgt i64 [[TMP2290]], [[TMP2291]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4273]], label [[IF_THEN4275:%.*]], label [[IF_END4276:%.*]] +// SIMD-ONLY0: if.then4275: +// SIMD-ONLY0-NEXT: [[TMP2292:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2292]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4276]] +// SIMD-ONLY0: if.end4276: +// SIMD-ONLY0-NEXT: [[TMP2293:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2294:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4277:%.*]] = icmp slt i64 [[TMP2293]], [[TMP2294]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4277]], label [[IF_THEN4279:%.*]], label [[IF_END4280:%.*]] +// SIMD-ONLY0: if.then4279: +// SIMD-ONLY0-NEXT: [[TMP2295:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2295]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4280]] +// SIMD-ONLY0: if.end4280: +// SIMD-ONLY0-NEXT: [[TMP2296:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2297:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4281:%.*]] = icmp eq i64 [[TMP2296]], [[TMP2297]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4281]], label [[COND_TRUE4283:%.*]], label [[COND_FALSE4284:%.*]] +// SIMD-ONLY0: cond.true4283: +// SIMD-ONLY0-NEXT: [[TMP2298:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4285:%.*]] +// SIMD-ONLY0: cond.false4284: +// SIMD-ONLY0-NEXT: [[TMP2299:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4285]] +// SIMD-ONLY0: cond.end4285: +// SIMD-ONLY0-NEXT: [[COND4286:%.*]] = phi i64 [ [[TMP2298]], [[COND_TRUE4283]] ], [ [[TMP2299]], [[COND_FALSE4284]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4286]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2300:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2301:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4287:%.*]] = icmp eq i64 [[TMP2300]], [[TMP2301]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4287]], label [[COND_TRUE4289:%.*]], label [[COND_FALSE4290:%.*]] +// SIMD-ONLY0: cond.true4289: +// SIMD-ONLY0-NEXT: [[TMP2302:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4291:%.*]] +// SIMD-ONLY0: cond.false4290: +// SIMD-ONLY0-NEXT: [[TMP2303:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4291]] +// SIMD-ONLY0: cond.end4291: +// SIMD-ONLY0-NEXT: [[COND4292:%.*]] = phi i64 [ [[TMP2302]], [[COND_TRUE4289]] ], [ [[TMP2303]], [[COND_FALSE4290]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4292]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2304:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2305:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4293:%.*]] = icmp eq i64 [[TMP2304]], [[TMP2305]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4293]], label [[IF_THEN4295:%.*]], label [[IF_END4296:%.*]] +// SIMD-ONLY0: if.then4295: +// SIMD-ONLY0-NEXT: [[TMP2306:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2306]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4296]] +// SIMD-ONLY0: if.end4296: +// SIMD-ONLY0-NEXT: [[TMP2307:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2308:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4297:%.*]] = icmp eq i64 [[TMP2307]], [[TMP2308]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4297]], label [[IF_THEN4299:%.*]], label [[IF_END4300:%.*]] +// SIMD-ONLY0: if.then4299: +// SIMD-ONLY0-NEXT: [[TMP2309:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2309]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4300]] +// SIMD-ONLY0: if.end4300: +// SIMD-ONLY0-NEXT: [[TMP2310:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2311:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4301:%.*]] = icmp ugt i64 [[TMP2310]], [[TMP2311]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4301]], label [[COND_TRUE4303:%.*]], label [[COND_FALSE4304:%.*]] +// SIMD-ONLY0: cond.true4303: +// SIMD-ONLY0-NEXT: [[TMP2312:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4305:%.*]] +// SIMD-ONLY0: cond.false4304: +// SIMD-ONLY0-NEXT: [[TMP2313:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4305]] +// SIMD-ONLY0: cond.end4305: +// SIMD-ONLY0-NEXT: [[COND4306:%.*]] = phi i64 [ [[TMP2312]], [[COND_TRUE4303]] ], [ [[TMP2313]], [[COND_FALSE4304]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4306]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2314:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2315:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4307:%.*]] = icmp ult i64 [[TMP2314]], [[TMP2315]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4307]], label [[COND_TRUE4309:%.*]], label [[COND_FALSE4310:%.*]] +// SIMD-ONLY0: cond.true4309: +// SIMD-ONLY0-NEXT: [[TMP2316:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4311:%.*]] +// SIMD-ONLY0: cond.false4310: +// SIMD-ONLY0-NEXT: [[TMP2317:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4311]] +// SIMD-ONLY0: cond.end4311: +// SIMD-ONLY0-NEXT: [[COND4312:%.*]] = phi i64 [ [[TMP2316]], [[COND_TRUE4309]] ], [ [[TMP2317]], [[COND_FALSE4310]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4312]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2318:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2319:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4313:%.*]] = icmp ugt i64 [[TMP2318]], [[TMP2319]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4313]], label [[COND_TRUE4315:%.*]], label [[COND_FALSE4316:%.*]] +// SIMD-ONLY0: cond.true4315: +// SIMD-ONLY0-NEXT: [[TMP2320:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4317:%.*]] +// SIMD-ONLY0: cond.false4316: +// SIMD-ONLY0-NEXT: [[TMP2321:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4317]] +// SIMD-ONLY0: cond.end4317: +// SIMD-ONLY0-NEXT: [[COND4318:%.*]] = phi i64 [ [[TMP2320]], [[COND_TRUE4315]] ], [ [[TMP2321]], [[COND_FALSE4316]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4318]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2322:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2323:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4319:%.*]] = icmp ult i64 [[TMP2322]], [[TMP2323]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4319]], label [[COND_TRUE4321:%.*]], label [[COND_FALSE4322:%.*]] +// SIMD-ONLY0: cond.true4321: +// SIMD-ONLY0-NEXT: [[TMP2324:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4323:%.*]] +// SIMD-ONLY0: cond.false4322: +// SIMD-ONLY0-NEXT: [[TMP2325:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4323]] +// SIMD-ONLY0: cond.end4323: +// SIMD-ONLY0-NEXT: [[COND4324:%.*]] = phi i64 [ [[TMP2324]], [[COND_TRUE4321]] ], [ [[TMP2325]], [[COND_FALSE4322]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4324]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2326:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2327:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4325:%.*]] = icmp ugt i64 [[TMP2326]], [[TMP2327]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4325]], label [[IF_THEN4327:%.*]], label [[IF_END4328:%.*]] +// SIMD-ONLY0: if.then4327: +// SIMD-ONLY0-NEXT: [[TMP2328:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2328]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4328]] +// SIMD-ONLY0: if.end4328: +// SIMD-ONLY0-NEXT: [[TMP2329:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2330:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4329:%.*]] = icmp ult i64 [[TMP2329]], [[TMP2330]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4329]], label [[IF_THEN4331:%.*]], label [[IF_END4332:%.*]] +// SIMD-ONLY0: if.then4331: +// SIMD-ONLY0-NEXT: [[TMP2331:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2331]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4332]] +// SIMD-ONLY0: if.end4332: +// SIMD-ONLY0-NEXT: [[TMP2332:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2333:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4333:%.*]] = icmp ugt i64 [[TMP2332]], [[TMP2333]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4333]], label [[IF_THEN4335:%.*]], label [[IF_END4336:%.*]] +// SIMD-ONLY0: if.then4335: +// SIMD-ONLY0-NEXT: [[TMP2334:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2334]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4336]] +// SIMD-ONLY0: if.end4336: +// SIMD-ONLY0-NEXT: [[TMP2335:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2336:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4337:%.*]] = icmp ult i64 [[TMP2335]], [[TMP2336]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4337]], label [[IF_THEN4339:%.*]], label [[IF_END4340:%.*]] +// SIMD-ONLY0: if.then4339: +// SIMD-ONLY0-NEXT: [[TMP2337:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2337]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4340]] +// SIMD-ONLY0: if.end4340: +// SIMD-ONLY0-NEXT: [[TMP2338:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2339:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4341:%.*]] = icmp eq i64 [[TMP2338]], [[TMP2339]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4341]], label [[COND_TRUE4343:%.*]], label [[COND_FALSE4344:%.*]] +// SIMD-ONLY0: cond.true4343: +// SIMD-ONLY0-NEXT: [[TMP2340:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4345:%.*]] +// SIMD-ONLY0: cond.false4344: +// SIMD-ONLY0-NEXT: [[TMP2341:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4345]] +// SIMD-ONLY0: cond.end4345: +// SIMD-ONLY0-NEXT: [[COND4346:%.*]] = phi i64 [ [[TMP2340]], [[COND_TRUE4343]] ], [ [[TMP2341]], [[COND_FALSE4344]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4346]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2342:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2343:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4347:%.*]] = icmp eq i64 [[TMP2342]], [[TMP2343]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4347]], label [[COND_TRUE4349:%.*]], label [[COND_FALSE4350:%.*]] +// SIMD-ONLY0: cond.true4349: +// SIMD-ONLY0-NEXT: [[TMP2344:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4351:%.*]] +// SIMD-ONLY0: cond.false4350: +// SIMD-ONLY0-NEXT: [[TMP2345:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4351]] +// SIMD-ONLY0: cond.end4351: +// SIMD-ONLY0-NEXT: [[COND4352:%.*]] = phi i64 [ [[TMP2344]], [[COND_TRUE4349]] ], [ [[TMP2345]], [[COND_FALSE4350]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4352]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2346:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2347:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4353:%.*]] = icmp eq i64 [[TMP2346]], [[TMP2347]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4353]], label [[IF_THEN4355:%.*]], label [[IF_END4356:%.*]] +// SIMD-ONLY0: if.then4355: +// SIMD-ONLY0-NEXT: [[TMP2348:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2348]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4356]] +// SIMD-ONLY0: if.end4356: +// SIMD-ONLY0-NEXT: [[TMP2349:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2350:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4357:%.*]] = icmp eq i64 [[TMP2349]], [[TMP2350]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4357]], label [[IF_THEN4359:%.*]], label [[IF_END4360:%.*]] +// SIMD-ONLY0: if.then4359: +// SIMD-ONLY0-NEXT: [[TMP2351:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2351]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4360]] +// SIMD-ONLY0: if.end4360: +// SIMD-ONLY0-NEXT: [[TMP2352:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2353:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4361:%.*]] = icmp sgt i64 [[TMP2352]], [[TMP2353]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4361]], label [[COND_TRUE4363:%.*]], label [[COND_FALSE4364:%.*]] +// SIMD-ONLY0: cond.true4363: +// SIMD-ONLY0-NEXT: [[TMP2354:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4365:%.*]] +// SIMD-ONLY0: cond.false4364: +// SIMD-ONLY0-NEXT: [[TMP2355:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4365]] +// SIMD-ONLY0: cond.end4365: +// SIMD-ONLY0-NEXT: [[COND4366:%.*]] = phi i64 [ [[TMP2354]], [[COND_TRUE4363]] ], [ [[TMP2355]], [[COND_FALSE4364]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4366]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2356:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2357:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4367:%.*]] = icmp slt i64 [[TMP2356]], [[TMP2357]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4367]], label [[COND_TRUE4369:%.*]], label [[COND_FALSE4370:%.*]] +// SIMD-ONLY0: cond.true4369: +// SIMD-ONLY0-NEXT: [[TMP2358:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4371:%.*]] +// SIMD-ONLY0: cond.false4370: +// SIMD-ONLY0-NEXT: [[TMP2359:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4371]] +// SIMD-ONLY0: cond.end4371: +// SIMD-ONLY0-NEXT: [[COND4372:%.*]] = phi i64 [ [[TMP2358]], [[COND_TRUE4369]] ], [ [[TMP2359]], [[COND_FALSE4370]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4372]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2360:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2361:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4373:%.*]] = icmp sgt i64 [[TMP2360]], [[TMP2361]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4373]], label [[COND_TRUE4375:%.*]], label [[COND_FALSE4376:%.*]] +// SIMD-ONLY0: cond.true4375: +// SIMD-ONLY0-NEXT: [[TMP2362:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4377:%.*]] +// SIMD-ONLY0: cond.false4376: +// SIMD-ONLY0-NEXT: [[TMP2363:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4377]] +// SIMD-ONLY0: cond.end4377: +// SIMD-ONLY0-NEXT: [[COND4378:%.*]] = phi i64 [ [[TMP2362]], [[COND_TRUE4375]] ], [ [[TMP2363]], [[COND_FALSE4376]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4378]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2364:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2365:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4379:%.*]] = icmp slt i64 [[TMP2364]], [[TMP2365]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4379]], label [[COND_TRUE4381:%.*]], label [[COND_FALSE4382:%.*]] +// SIMD-ONLY0: cond.true4381: +// SIMD-ONLY0-NEXT: [[TMP2366:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4383:%.*]] +// SIMD-ONLY0: cond.false4382: +// SIMD-ONLY0-NEXT: [[TMP2367:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4383]] +// SIMD-ONLY0: cond.end4383: +// SIMD-ONLY0-NEXT: [[COND4384:%.*]] = phi i64 [ [[TMP2366]], [[COND_TRUE4381]] ], [ [[TMP2367]], [[COND_FALSE4382]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4384]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2368:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2369:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4385:%.*]] = icmp sgt i64 [[TMP2368]], [[TMP2369]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4385]], label [[IF_THEN4387:%.*]], label [[IF_END4388:%.*]] +// SIMD-ONLY0: if.then4387: +// SIMD-ONLY0-NEXT: [[TMP2370:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2370]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4388]] +// SIMD-ONLY0: if.end4388: +// SIMD-ONLY0-NEXT: [[TMP2371:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2372:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4389:%.*]] = icmp slt i64 [[TMP2371]], [[TMP2372]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4389]], label [[IF_THEN4391:%.*]], label [[IF_END4392:%.*]] +// SIMD-ONLY0: if.then4391: +// SIMD-ONLY0-NEXT: [[TMP2373:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2373]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4392]] +// SIMD-ONLY0: if.end4392: +// SIMD-ONLY0-NEXT: [[TMP2374:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2375:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4393:%.*]] = icmp sgt i64 [[TMP2374]], [[TMP2375]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4393]], label [[IF_THEN4395:%.*]], label [[IF_END4396:%.*]] +// SIMD-ONLY0: if.then4395: +// SIMD-ONLY0-NEXT: [[TMP2376:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2376]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4396]] +// SIMD-ONLY0: if.end4396: +// SIMD-ONLY0-NEXT: [[TMP2377:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2378:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4397:%.*]] = icmp slt i64 [[TMP2377]], [[TMP2378]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4397]], label [[IF_THEN4399:%.*]], label [[IF_END4400:%.*]] +// SIMD-ONLY0: if.then4399: +// SIMD-ONLY0-NEXT: [[TMP2379:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2379]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4400]] +// SIMD-ONLY0: if.end4400: +// SIMD-ONLY0-NEXT: [[TMP2380:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2381:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4401:%.*]] = icmp eq i64 [[TMP2380]], [[TMP2381]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4401]], label [[COND_TRUE4403:%.*]], label [[COND_FALSE4404:%.*]] +// SIMD-ONLY0: cond.true4403: +// SIMD-ONLY0-NEXT: [[TMP2382:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4405:%.*]] +// SIMD-ONLY0: cond.false4404: +// SIMD-ONLY0-NEXT: [[TMP2383:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4405]] +// SIMD-ONLY0: cond.end4405: +// SIMD-ONLY0-NEXT: [[COND4406:%.*]] = phi i64 [ [[TMP2382]], [[COND_TRUE4403]] ], [ [[TMP2383]], [[COND_FALSE4404]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4406]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2384:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2385:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4407:%.*]] = icmp eq i64 [[TMP2384]], [[TMP2385]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4407]], label [[COND_TRUE4409:%.*]], label [[COND_FALSE4410:%.*]] +// SIMD-ONLY0: cond.true4409: +// SIMD-ONLY0-NEXT: [[TMP2386:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4411:%.*]] +// SIMD-ONLY0: cond.false4410: +// SIMD-ONLY0-NEXT: [[TMP2387:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4411]] +// SIMD-ONLY0: cond.end4411: +// SIMD-ONLY0-NEXT: [[COND4412:%.*]] = phi i64 [ [[TMP2386]], [[COND_TRUE4409]] ], [ [[TMP2387]], [[COND_FALSE4410]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4412]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2388:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2389:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4413:%.*]] = icmp eq i64 [[TMP2388]], [[TMP2389]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4413]], label [[IF_THEN4415:%.*]], label [[IF_END4416:%.*]] +// SIMD-ONLY0: if.then4415: +// SIMD-ONLY0-NEXT: [[TMP2390:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2390]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4416]] +// SIMD-ONLY0: if.end4416: +// SIMD-ONLY0-NEXT: [[TMP2391:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2392:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4417:%.*]] = icmp eq i64 [[TMP2391]], [[TMP2392]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4417]], label [[IF_THEN4419:%.*]], label [[IF_END4420:%.*]] +// SIMD-ONLY0: if.then4419: +// SIMD-ONLY0-NEXT: [[TMP2393:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2393]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4420]] +// SIMD-ONLY0: if.end4420: +// SIMD-ONLY0-NEXT: [[TMP2394:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2395:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4421:%.*]] = icmp ugt i64 [[TMP2394]], [[TMP2395]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4421]], label [[COND_TRUE4423:%.*]], label [[COND_FALSE4424:%.*]] +// SIMD-ONLY0: cond.true4423: +// SIMD-ONLY0-NEXT: [[TMP2396:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4425:%.*]] +// SIMD-ONLY0: cond.false4424: +// SIMD-ONLY0-NEXT: [[TMP2397:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4425]] +// SIMD-ONLY0: cond.end4425: +// SIMD-ONLY0-NEXT: [[COND4426:%.*]] = phi i64 [ [[TMP2396]], [[COND_TRUE4423]] ], [ [[TMP2397]], [[COND_FALSE4424]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4426]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2398:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2399:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4427:%.*]] = icmp ult i64 [[TMP2398]], [[TMP2399]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4427]], label [[COND_TRUE4429:%.*]], label [[COND_FALSE4430:%.*]] +// SIMD-ONLY0: cond.true4429: +// SIMD-ONLY0-NEXT: [[TMP2400:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4431:%.*]] +// SIMD-ONLY0: cond.false4430: +// SIMD-ONLY0-NEXT: [[TMP2401:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4431]] +// SIMD-ONLY0: cond.end4431: +// SIMD-ONLY0-NEXT: [[COND4432:%.*]] = phi i64 [ [[TMP2400]], [[COND_TRUE4429]] ], [ [[TMP2401]], [[COND_FALSE4430]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4432]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2402:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2403:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4433:%.*]] = icmp ugt i64 [[TMP2402]], [[TMP2403]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4433]], label [[COND_TRUE4435:%.*]], label [[COND_FALSE4436:%.*]] +// SIMD-ONLY0: cond.true4435: +// SIMD-ONLY0-NEXT: [[TMP2404:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4437:%.*]] +// SIMD-ONLY0: cond.false4436: +// SIMD-ONLY0-NEXT: [[TMP2405:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4437]] +// SIMD-ONLY0: cond.end4437: +// SIMD-ONLY0-NEXT: [[COND4438:%.*]] = phi i64 [ [[TMP2404]], [[COND_TRUE4435]] ], [ [[TMP2405]], [[COND_FALSE4436]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4438]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2406:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2407:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4439:%.*]] = icmp ult i64 [[TMP2406]], [[TMP2407]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4439]], label [[COND_TRUE4441:%.*]], label [[COND_FALSE4442:%.*]] +// SIMD-ONLY0: cond.true4441: +// SIMD-ONLY0-NEXT: [[TMP2408:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4443:%.*]] +// SIMD-ONLY0: cond.false4442: +// SIMD-ONLY0-NEXT: [[TMP2409:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4443]] +// SIMD-ONLY0: cond.end4443: +// SIMD-ONLY0-NEXT: [[COND4444:%.*]] = phi i64 [ [[TMP2408]], [[COND_TRUE4441]] ], [ [[TMP2409]], [[COND_FALSE4442]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4444]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2410:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2411:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4445:%.*]] = icmp ugt i64 [[TMP2410]], [[TMP2411]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4445]], label [[IF_THEN4447:%.*]], label [[IF_END4448:%.*]] +// SIMD-ONLY0: if.then4447: +// SIMD-ONLY0-NEXT: [[TMP2412:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2412]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4448]] +// SIMD-ONLY0: if.end4448: +// SIMD-ONLY0-NEXT: [[TMP2413:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2414:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4449:%.*]] = icmp ult i64 [[TMP2413]], [[TMP2414]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4449]], label [[IF_THEN4451:%.*]], label [[IF_END4452:%.*]] +// SIMD-ONLY0: if.then4451: +// SIMD-ONLY0-NEXT: [[TMP2415:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2415]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4452]] +// SIMD-ONLY0: if.end4452: +// SIMD-ONLY0-NEXT: [[TMP2416:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2417:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4453:%.*]] = icmp ugt i64 [[TMP2416]], [[TMP2417]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4453]], label [[IF_THEN4455:%.*]], label [[IF_END4456:%.*]] +// SIMD-ONLY0: if.then4455: +// SIMD-ONLY0-NEXT: [[TMP2418:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2418]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4456]] +// SIMD-ONLY0: if.end4456: +// SIMD-ONLY0-NEXT: [[TMP2419:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2420:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4457:%.*]] = icmp ult i64 [[TMP2419]], [[TMP2420]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4457]], label [[IF_THEN4459:%.*]], label [[IF_END4460:%.*]] +// SIMD-ONLY0: if.then4459: +// SIMD-ONLY0-NEXT: [[TMP2421:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2421]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4460]] +// SIMD-ONLY0: if.end4460: +// SIMD-ONLY0-NEXT: [[TMP2422:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2423:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4461:%.*]] = icmp eq i64 [[TMP2422]], [[TMP2423]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4461]], label [[COND_TRUE4463:%.*]], label [[COND_FALSE4464:%.*]] +// SIMD-ONLY0: cond.true4463: +// SIMD-ONLY0-NEXT: [[TMP2424:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4465:%.*]] +// SIMD-ONLY0: cond.false4464: +// SIMD-ONLY0-NEXT: [[TMP2425:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4465]] +// SIMD-ONLY0: cond.end4465: +// SIMD-ONLY0-NEXT: [[COND4466:%.*]] = phi i64 [ [[TMP2424]], [[COND_TRUE4463]] ], [ [[TMP2425]], [[COND_FALSE4464]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4466]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2426:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2427:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4467:%.*]] = icmp eq i64 [[TMP2426]], [[TMP2427]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4467]], label [[COND_TRUE4469:%.*]], label [[COND_FALSE4470:%.*]] +// SIMD-ONLY0: cond.true4469: +// SIMD-ONLY0-NEXT: [[TMP2428:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4471:%.*]] +// SIMD-ONLY0: cond.false4470: +// SIMD-ONLY0-NEXT: [[TMP2429:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4471]] +// SIMD-ONLY0: cond.end4471: +// SIMD-ONLY0-NEXT: [[COND4472:%.*]] = phi i64 [ [[TMP2428]], [[COND_TRUE4469]] ], [ [[TMP2429]], [[COND_FALSE4470]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4472]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2430:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2431:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4473:%.*]] = icmp eq i64 [[TMP2430]], [[TMP2431]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4473]], label [[IF_THEN4475:%.*]], label [[IF_END4476:%.*]] +// SIMD-ONLY0: if.then4475: +// SIMD-ONLY0-NEXT: [[TMP2432:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2432]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4476]] +// SIMD-ONLY0: if.end4476: +// SIMD-ONLY0-NEXT: [[TMP2433:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2434:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4477:%.*]] = icmp eq i64 [[TMP2433]], [[TMP2434]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4477]], label [[IF_THEN4479:%.*]], label [[IF_END4480:%.*]] +// SIMD-ONLY0: if.then4479: +// SIMD-ONLY0-NEXT: [[TMP2435:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2435]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4480]] +// SIMD-ONLY0: if.end4480: +// SIMD-ONLY0-NEXT: [[TMP2436:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2437:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4481:%.*]] = icmp sgt i64 [[TMP2436]], [[TMP2437]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4481]], label [[COND_TRUE4483:%.*]], label [[COND_FALSE4484:%.*]] +// SIMD-ONLY0: cond.true4483: +// SIMD-ONLY0-NEXT: [[TMP2438:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4485:%.*]] +// SIMD-ONLY0: cond.false4484: +// SIMD-ONLY0-NEXT: [[TMP2439:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4485]] +// SIMD-ONLY0: cond.end4485: +// SIMD-ONLY0-NEXT: [[COND4486:%.*]] = phi i64 [ [[TMP2438]], [[COND_TRUE4483]] ], [ [[TMP2439]], [[COND_FALSE4484]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4486]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2440:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2441:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4487:%.*]] = icmp slt i64 [[TMP2440]], [[TMP2441]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4487]], label [[COND_TRUE4489:%.*]], label [[COND_FALSE4490:%.*]] +// SIMD-ONLY0: cond.true4489: +// SIMD-ONLY0-NEXT: [[TMP2442:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4491:%.*]] +// SIMD-ONLY0: cond.false4490: +// SIMD-ONLY0-NEXT: [[TMP2443:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4491]] +// SIMD-ONLY0: cond.end4491: +// SIMD-ONLY0-NEXT: [[COND4492:%.*]] = phi i64 [ [[TMP2442]], [[COND_TRUE4489]] ], [ [[TMP2443]], [[COND_FALSE4490]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4492]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2444:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2445:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4493:%.*]] = icmp sgt i64 [[TMP2444]], [[TMP2445]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4493]], label [[COND_TRUE4495:%.*]], label [[COND_FALSE4496:%.*]] +// SIMD-ONLY0: cond.true4495: +// SIMD-ONLY0-NEXT: [[TMP2446:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4497:%.*]] +// SIMD-ONLY0: cond.false4496: +// SIMD-ONLY0-NEXT: [[TMP2447:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4497]] +// SIMD-ONLY0: cond.end4497: +// SIMD-ONLY0-NEXT: [[COND4498:%.*]] = phi i64 [ [[TMP2446]], [[COND_TRUE4495]] ], [ [[TMP2447]], [[COND_FALSE4496]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4498]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2448:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2449:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4499:%.*]] = icmp slt i64 [[TMP2448]], [[TMP2449]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4499]], label [[COND_TRUE4501:%.*]], label [[COND_FALSE4502:%.*]] +// SIMD-ONLY0: cond.true4501: +// SIMD-ONLY0-NEXT: [[TMP2450:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4503:%.*]] +// SIMD-ONLY0: cond.false4502: +// SIMD-ONLY0-NEXT: [[TMP2451:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4503]] +// SIMD-ONLY0: cond.end4503: +// SIMD-ONLY0-NEXT: [[COND4504:%.*]] = phi i64 [ [[TMP2450]], [[COND_TRUE4501]] ], [ [[TMP2451]], [[COND_FALSE4502]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4504]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2452:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2453:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4505:%.*]] = icmp sgt i64 [[TMP2452]], [[TMP2453]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4505]], label [[IF_THEN4507:%.*]], label [[IF_END4508:%.*]] +// SIMD-ONLY0: if.then4507: +// SIMD-ONLY0-NEXT: [[TMP2454:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2454]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4508]] +// SIMD-ONLY0: if.end4508: +// SIMD-ONLY0-NEXT: [[TMP2455:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2456:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4509:%.*]] = icmp slt i64 [[TMP2455]], [[TMP2456]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4509]], label [[IF_THEN4511:%.*]], label [[IF_END4512:%.*]] +// SIMD-ONLY0: if.then4511: +// SIMD-ONLY0-NEXT: [[TMP2457:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2457]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4512]] +// SIMD-ONLY0: if.end4512: +// SIMD-ONLY0-NEXT: [[TMP2458:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2459:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4513:%.*]] = icmp sgt i64 [[TMP2458]], [[TMP2459]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4513]], label [[IF_THEN4515:%.*]], label [[IF_END4516:%.*]] +// SIMD-ONLY0: if.then4515: +// SIMD-ONLY0-NEXT: [[TMP2460:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2460]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4516]] +// SIMD-ONLY0: if.end4516: +// SIMD-ONLY0-NEXT: [[TMP2461:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2462:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4517:%.*]] = icmp slt i64 [[TMP2461]], [[TMP2462]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4517]], label [[IF_THEN4519:%.*]], label [[IF_END4520:%.*]] +// SIMD-ONLY0: if.then4519: +// SIMD-ONLY0-NEXT: [[TMP2463:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2463]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4520]] +// SIMD-ONLY0: if.end4520: +// SIMD-ONLY0-NEXT: [[TMP2464:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2465:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4521:%.*]] = icmp eq i64 [[TMP2464]], [[TMP2465]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4521]], label [[COND_TRUE4523:%.*]], label [[COND_FALSE4524:%.*]] +// SIMD-ONLY0: cond.true4523: +// SIMD-ONLY0-NEXT: [[TMP2466:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4525:%.*]] +// SIMD-ONLY0: cond.false4524: +// SIMD-ONLY0-NEXT: [[TMP2467:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4525]] +// SIMD-ONLY0: cond.end4525: +// SIMD-ONLY0-NEXT: [[COND4526:%.*]] = phi i64 [ [[TMP2466]], [[COND_TRUE4523]] ], [ [[TMP2467]], [[COND_FALSE4524]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4526]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2468:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2469:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4527:%.*]] = icmp eq i64 [[TMP2468]], [[TMP2469]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4527]], label [[COND_TRUE4529:%.*]], label [[COND_FALSE4530:%.*]] +// SIMD-ONLY0: cond.true4529: +// SIMD-ONLY0-NEXT: [[TMP2470:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4531:%.*]] +// SIMD-ONLY0: cond.false4530: +// SIMD-ONLY0-NEXT: [[TMP2471:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4531]] +// SIMD-ONLY0: cond.end4531: +// SIMD-ONLY0-NEXT: [[COND4532:%.*]] = phi i64 [ [[TMP2470]], [[COND_TRUE4529]] ], [ [[TMP2471]], [[COND_FALSE4530]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4532]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2472:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2473:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4533:%.*]] = icmp eq i64 [[TMP2472]], [[TMP2473]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4533]], label [[IF_THEN4535:%.*]], label [[IF_END4536:%.*]] +// SIMD-ONLY0: if.then4535: +// SIMD-ONLY0-NEXT: [[TMP2474:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2474]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4536]] +// SIMD-ONLY0: if.end4536: +// SIMD-ONLY0-NEXT: [[TMP2475:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2476:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4537:%.*]] = icmp eq i64 [[TMP2475]], [[TMP2476]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4537]], label [[IF_THEN4539:%.*]], label [[IF_END4540:%.*]] +// SIMD-ONLY0: if.then4539: +// SIMD-ONLY0-NEXT: [[TMP2477:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2477]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4540]] +// SIMD-ONLY0: if.end4540: +// SIMD-ONLY0-NEXT: [[TMP2478:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2479:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4541:%.*]] = icmp ugt i64 [[TMP2478]], [[TMP2479]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4541]], label [[COND_TRUE4543:%.*]], label [[COND_FALSE4544:%.*]] +// SIMD-ONLY0: cond.true4543: +// SIMD-ONLY0-NEXT: [[TMP2480:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4545:%.*]] +// SIMD-ONLY0: cond.false4544: +// SIMD-ONLY0-NEXT: [[TMP2481:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4545]] +// SIMD-ONLY0: cond.end4545: +// SIMD-ONLY0-NEXT: [[COND4546:%.*]] = phi i64 [ [[TMP2480]], [[COND_TRUE4543]] ], [ [[TMP2481]], [[COND_FALSE4544]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4546]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2482:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2483:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4547:%.*]] = icmp ult i64 [[TMP2482]], [[TMP2483]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4547]], label [[COND_TRUE4549:%.*]], label [[COND_FALSE4550:%.*]] +// SIMD-ONLY0: cond.true4549: +// SIMD-ONLY0-NEXT: [[TMP2484:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4551:%.*]] +// SIMD-ONLY0: cond.false4550: +// SIMD-ONLY0-NEXT: [[TMP2485:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4551]] +// SIMD-ONLY0: cond.end4551: +// SIMD-ONLY0-NEXT: [[COND4552:%.*]] = phi i64 [ [[TMP2484]], [[COND_TRUE4549]] ], [ [[TMP2485]], [[COND_FALSE4550]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4552]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2486:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2487:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4553:%.*]] = icmp ugt i64 [[TMP2486]], [[TMP2487]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4553]], label [[COND_TRUE4555:%.*]], label [[COND_FALSE4556:%.*]] +// SIMD-ONLY0: cond.true4555: +// SIMD-ONLY0-NEXT: [[TMP2488:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4557:%.*]] +// SIMD-ONLY0: cond.false4556: +// SIMD-ONLY0-NEXT: [[TMP2489:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4557]] +// SIMD-ONLY0: cond.end4557: +// SIMD-ONLY0-NEXT: [[COND4558:%.*]] = phi i64 [ [[TMP2488]], [[COND_TRUE4555]] ], [ [[TMP2489]], [[COND_FALSE4556]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4558]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2490:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2491:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4559:%.*]] = icmp ult i64 [[TMP2490]], [[TMP2491]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4559]], label [[COND_TRUE4561:%.*]], label [[COND_FALSE4562:%.*]] +// SIMD-ONLY0: cond.true4561: +// SIMD-ONLY0-NEXT: [[TMP2492:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4563:%.*]] +// SIMD-ONLY0: cond.false4562: +// SIMD-ONLY0-NEXT: [[TMP2493:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4563]] +// SIMD-ONLY0: cond.end4563: +// SIMD-ONLY0-NEXT: [[COND4564:%.*]] = phi i64 [ [[TMP2492]], [[COND_TRUE4561]] ], [ [[TMP2493]], [[COND_FALSE4562]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4564]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2494:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2495:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4565:%.*]] = icmp ugt i64 [[TMP2494]], [[TMP2495]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4565]], label [[IF_THEN4567:%.*]], label [[IF_END4568:%.*]] +// SIMD-ONLY0: if.then4567: +// SIMD-ONLY0-NEXT: [[TMP2496:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2496]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4568]] +// SIMD-ONLY0: if.end4568: +// SIMD-ONLY0-NEXT: [[TMP2497:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2498:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4569:%.*]] = icmp ult i64 [[TMP2497]], [[TMP2498]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4569]], label [[IF_THEN4571:%.*]], label [[IF_END4572:%.*]] +// SIMD-ONLY0: if.then4571: +// SIMD-ONLY0-NEXT: [[TMP2499:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2499]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4572]] +// SIMD-ONLY0: if.end4572: +// SIMD-ONLY0-NEXT: [[TMP2500:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2501:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4573:%.*]] = icmp ugt i64 [[TMP2500]], [[TMP2501]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4573]], label [[IF_THEN4575:%.*]], label [[IF_END4576:%.*]] +// SIMD-ONLY0: if.then4575: +// SIMD-ONLY0-NEXT: [[TMP2502:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2502]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4576]] +// SIMD-ONLY0: if.end4576: +// SIMD-ONLY0-NEXT: [[TMP2503:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2504:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4577:%.*]] = icmp ult i64 [[TMP2503]], [[TMP2504]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4577]], label [[IF_THEN4579:%.*]], label [[IF_END4580:%.*]] +// SIMD-ONLY0: if.then4579: +// SIMD-ONLY0-NEXT: [[TMP2505:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2505]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4580]] +// SIMD-ONLY0: if.end4580: +// SIMD-ONLY0-NEXT: [[TMP2506:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2507:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4581:%.*]] = icmp eq i64 [[TMP2506]], [[TMP2507]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4581]], label [[COND_TRUE4583:%.*]], label [[COND_FALSE4584:%.*]] +// SIMD-ONLY0: cond.true4583: +// SIMD-ONLY0-NEXT: [[TMP2508:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4585:%.*]] +// SIMD-ONLY0: cond.false4584: +// SIMD-ONLY0-NEXT: [[TMP2509:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4585]] +// SIMD-ONLY0: cond.end4585: +// SIMD-ONLY0-NEXT: [[COND4586:%.*]] = phi i64 [ [[TMP2508]], [[COND_TRUE4583]] ], [ [[TMP2509]], [[COND_FALSE4584]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4586]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2510:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2511:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4587:%.*]] = icmp eq i64 [[TMP2510]], [[TMP2511]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4587]], label [[COND_TRUE4589:%.*]], label [[COND_FALSE4590:%.*]] +// SIMD-ONLY0: cond.true4589: +// SIMD-ONLY0-NEXT: [[TMP2512:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4591:%.*]] +// SIMD-ONLY0: cond.false4590: +// SIMD-ONLY0-NEXT: [[TMP2513:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4591]] +// SIMD-ONLY0: cond.end4591: +// SIMD-ONLY0-NEXT: [[COND4592:%.*]] = phi i64 [ [[TMP2512]], [[COND_TRUE4589]] ], [ [[TMP2513]], [[COND_FALSE4590]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND4592]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2514:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2515:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4593:%.*]] = icmp eq i64 [[TMP2514]], [[TMP2515]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4593]], label [[IF_THEN4595:%.*]], label [[IF_END4596:%.*]] +// SIMD-ONLY0: if.then4595: +// SIMD-ONLY0-NEXT: [[TMP2516:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2516]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4596]] +// SIMD-ONLY0: if.end4596: +// SIMD-ONLY0-NEXT: [[TMP2517:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2518:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4597:%.*]] = icmp eq i64 [[TMP2517]], [[TMP2518]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4597]], label [[IF_THEN4599:%.*]], label [[IF_END4600:%.*]] +// SIMD-ONLY0: if.then4599: +// SIMD-ONLY0-NEXT: [[TMP2519:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2519]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4600]] +// SIMD-ONLY0: if.end4600: +// SIMD-ONLY0-NEXT: [[TMP2520:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2521:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4601:%.*]] = fcmp ogt float [[TMP2520]], [[TMP2521]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4601]], label [[COND_TRUE4603:%.*]], label [[COND_FALSE4604:%.*]] +// SIMD-ONLY0: cond.true4603: +// SIMD-ONLY0-NEXT: [[TMP2522:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4605:%.*]] +// SIMD-ONLY0: cond.false4604: +// SIMD-ONLY0-NEXT: [[TMP2523:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4605]] +// SIMD-ONLY0: cond.end4605: +// SIMD-ONLY0-NEXT: [[COND4606:%.*]] = phi float [ [[TMP2522]], [[COND_TRUE4603]] ], [ [[TMP2523]], [[COND_FALSE4604]] ] +// SIMD-ONLY0-NEXT: store float [[COND4606]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2524:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2525:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4607:%.*]] = fcmp olt float [[TMP2524]], [[TMP2525]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4607]], label [[COND_TRUE4609:%.*]], label [[COND_FALSE4610:%.*]] +// SIMD-ONLY0: cond.true4609: +// SIMD-ONLY0-NEXT: [[TMP2526:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4611:%.*]] +// SIMD-ONLY0: cond.false4610: +// SIMD-ONLY0-NEXT: [[TMP2527:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4611]] +// SIMD-ONLY0: cond.end4611: +// SIMD-ONLY0-NEXT: [[COND4612:%.*]] = phi float [ [[TMP2526]], [[COND_TRUE4609]] ], [ [[TMP2527]], [[COND_FALSE4610]] ] +// SIMD-ONLY0-NEXT: store float [[COND4612]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2528:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2529:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4613:%.*]] = fcmp ogt float [[TMP2528]], [[TMP2529]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4613]], label [[COND_TRUE4615:%.*]], label [[COND_FALSE4616:%.*]] +// SIMD-ONLY0: cond.true4615: +// SIMD-ONLY0-NEXT: [[TMP2530:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4617:%.*]] +// SIMD-ONLY0: cond.false4616: +// SIMD-ONLY0-NEXT: [[TMP2531:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4617]] +// SIMD-ONLY0: cond.end4617: +// SIMD-ONLY0-NEXT: [[COND4618:%.*]] = phi float [ [[TMP2530]], [[COND_TRUE4615]] ], [ [[TMP2531]], [[COND_FALSE4616]] ] +// SIMD-ONLY0-NEXT: store float [[COND4618]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2532:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2533:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4619:%.*]] = fcmp olt float [[TMP2532]], [[TMP2533]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4619]], label [[COND_TRUE4621:%.*]], label [[COND_FALSE4622:%.*]] +// SIMD-ONLY0: cond.true4621: +// SIMD-ONLY0-NEXT: [[TMP2534:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4623:%.*]] +// SIMD-ONLY0: cond.false4622: +// SIMD-ONLY0-NEXT: [[TMP2535:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4623]] +// SIMD-ONLY0: cond.end4623: +// SIMD-ONLY0-NEXT: [[COND4624:%.*]] = phi float [ [[TMP2534]], [[COND_TRUE4621]] ], [ [[TMP2535]], [[COND_FALSE4622]] ] +// SIMD-ONLY0-NEXT: store float [[COND4624]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2536:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2537:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4625:%.*]] = fcmp ogt float [[TMP2536]], [[TMP2537]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4625]], label [[IF_THEN4627:%.*]], label [[IF_END4628:%.*]] +// SIMD-ONLY0: if.then4627: +// SIMD-ONLY0-NEXT: [[TMP2538:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2538]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4628]] +// SIMD-ONLY0: if.end4628: +// SIMD-ONLY0-NEXT: [[TMP2539:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2540:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4629:%.*]] = fcmp olt float [[TMP2539]], [[TMP2540]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4629]], label [[IF_THEN4631:%.*]], label [[IF_END4632:%.*]] +// SIMD-ONLY0: if.then4631: +// SIMD-ONLY0-NEXT: [[TMP2541:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2541]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4632]] +// SIMD-ONLY0: if.end4632: +// SIMD-ONLY0-NEXT: [[TMP2542:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2543:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4633:%.*]] = fcmp ogt float [[TMP2542]], [[TMP2543]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4633]], label [[IF_THEN4635:%.*]], label [[IF_END4636:%.*]] +// SIMD-ONLY0: if.then4635: +// SIMD-ONLY0-NEXT: [[TMP2544:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2544]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4636]] +// SIMD-ONLY0: if.end4636: +// SIMD-ONLY0-NEXT: [[TMP2545:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2546:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4637:%.*]] = fcmp olt float [[TMP2545]], [[TMP2546]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4637]], label [[IF_THEN4639:%.*]], label [[IF_END4640:%.*]] +// SIMD-ONLY0: if.then4639: +// SIMD-ONLY0-NEXT: [[TMP2547:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2547]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4640]] +// SIMD-ONLY0: if.end4640: +// SIMD-ONLY0-NEXT: [[TMP2548:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2549:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4641:%.*]] = fcmp ogt float [[TMP2548]], [[TMP2549]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4641]], label [[COND_TRUE4643:%.*]], label [[COND_FALSE4644:%.*]] +// SIMD-ONLY0: cond.true4643: +// SIMD-ONLY0-NEXT: [[TMP2550:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4645:%.*]] +// SIMD-ONLY0: cond.false4644: +// SIMD-ONLY0-NEXT: [[TMP2551:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4645]] +// SIMD-ONLY0: cond.end4645: +// SIMD-ONLY0-NEXT: [[COND4646:%.*]] = phi float [ [[TMP2550]], [[COND_TRUE4643]] ], [ [[TMP2551]], [[COND_FALSE4644]] ] +// SIMD-ONLY0-NEXT: store float [[COND4646]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2552:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2553:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4647:%.*]] = fcmp olt float [[TMP2552]], [[TMP2553]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4647]], label [[COND_TRUE4649:%.*]], label [[COND_FALSE4650:%.*]] +// SIMD-ONLY0: cond.true4649: +// SIMD-ONLY0-NEXT: [[TMP2554:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4651:%.*]] +// SIMD-ONLY0: cond.false4650: +// SIMD-ONLY0-NEXT: [[TMP2555:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4651]] +// SIMD-ONLY0: cond.end4651: +// SIMD-ONLY0-NEXT: [[COND4652:%.*]] = phi float [ [[TMP2554]], [[COND_TRUE4649]] ], [ [[TMP2555]], [[COND_FALSE4650]] ] +// SIMD-ONLY0-NEXT: store float [[COND4652]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2556:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2557:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4653:%.*]] = fcmp ogt float [[TMP2556]], [[TMP2557]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4653]], label [[COND_TRUE4655:%.*]], label [[COND_FALSE4656:%.*]] +// SIMD-ONLY0: cond.true4655: +// SIMD-ONLY0-NEXT: [[TMP2558:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4657:%.*]] +// SIMD-ONLY0: cond.false4656: +// SIMD-ONLY0-NEXT: [[TMP2559:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4657]] +// SIMD-ONLY0: cond.end4657: +// SIMD-ONLY0-NEXT: [[COND4658:%.*]] = phi float [ [[TMP2558]], [[COND_TRUE4655]] ], [ [[TMP2559]], [[COND_FALSE4656]] ] +// SIMD-ONLY0-NEXT: store float [[COND4658]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2560:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2561:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4659:%.*]] = fcmp olt float [[TMP2560]], [[TMP2561]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4659]], label [[COND_TRUE4661:%.*]], label [[COND_FALSE4662:%.*]] +// SIMD-ONLY0: cond.true4661: +// SIMD-ONLY0-NEXT: [[TMP2562:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4663:%.*]] +// SIMD-ONLY0: cond.false4662: +// SIMD-ONLY0-NEXT: [[TMP2563:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4663]] +// SIMD-ONLY0: cond.end4663: +// SIMD-ONLY0-NEXT: [[COND4664:%.*]] = phi float [ [[TMP2562]], [[COND_TRUE4661]] ], [ [[TMP2563]], [[COND_FALSE4662]] ] +// SIMD-ONLY0-NEXT: store float [[COND4664]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2564:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2565:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4665:%.*]] = fcmp ogt float [[TMP2564]], [[TMP2565]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4665]], label [[IF_THEN4667:%.*]], label [[IF_END4668:%.*]] +// SIMD-ONLY0: if.then4667: +// SIMD-ONLY0-NEXT: [[TMP2566:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2566]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4668]] +// SIMD-ONLY0: if.end4668: +// SIMD-ONLY0-NEXT: [[TMP2567:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2568:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4669:%.*]] = fcmp olt float [[TMP2567]], [[TMP2568]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4669]], label [[IF_THEN4671:%.*]], label [[IF_END4672:%.*]] +// SIMD-ONLY0: if.then4671: +// SIMD-ONLY0-NEXT: [[TMP2569:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2569]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4672]] +// SIMD-ONLY0: if.end4672: +// SIMD-ONLY0-NEXT: [[TMP2570:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2571:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4673:%.*]] = fcmp ogt float [[TMP2570]], [[TMP2571]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4673]], label [[IF_THEN4675:%.*]], label [[IF_END4676:%.*]] +// SIMD-ONLY0: if.then4675: +// SIMD-ONLY0-NEXT: [[TMP2572:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2572]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4676]] +// SIMD-ONLY0: if.end4676: +// SIMD-ONLY0-NEXT: [[TMP2573:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2574:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4677:%.*]] = fcmp olt float [[TMP2573]], [[TMP2574]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4677]], label [[IF_THEN4679:%.*]], label [[IF_END4680:%.*]] +// SIMD-ONLY0: if.then4679: +// SIMD-ONLY0-NEXT: [[TMP2575:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2575]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4680]] +// SIMD-ONLY0: if.end4680: +// SIMD-ONLY0-NEXT: [[TMP2576:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2577:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4681:%.*]] = fcmp ogt float [[TMP2576]], [[TMP2577]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4681]], label [[COND_TRUE4683:%.*]], label [[COND_FALSE4684:%.*]] +// SIMD-ONLY0: cond.true4683: +// SIMD-ONLY0-NEXT: [[TMP2578:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4685:%.*]] +// SIMD-ONLY0: cond.false4684: +// SIMD-ONLY0-NEXT: [[TMP2579:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4685]] +// SIMD-ONLY0: cond.end4685: +// SIMD-ONLY0-NEXT: [[COND4686:%.*]] = phi float [ [[TMP2578]], [[COND_TRUE4683]] ], [ [[TMP2579]], [[COND_FALSE4684]] ] +// SIMD-ONLY0-NEXT: store float [[COND4686]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2580:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2581:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4687:%.*]] = fcmp olt float [[TMP2580]], [[TMP2581]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4687]], label [[COND_TRUE4689:%.*]], label [[COND_FALSE4690:%.*]] +// SIMD-ONLY0: cond.true4689: +// SIMD-ONLY0-NEXT: [[TMP2582:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4691:%.*]] +// SIMD-ONLY0: cond.false4690: +// SIMD-ONLY0-NEXT: [[TMP2583:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4691]] +// SIMD-ONLY0: cond.end4691: +// SIMD-ONLY0-NEXT: [[COND4692:%.*]] = phi float [ [[TMP2582]], [[COND_TRUE4689]] ], [ [[TMP2583]], [[COND_FALSE4690]] ] +// SIMD-ONLY0-NEXT: store float [[COND4692]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2584:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2585:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4693:%.*]] = fcmp ogt float [[TMP2584]], [[TMP2585]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4693]], label [[COND_TRUE4695:%.*]], label [[COND_FALSE4696:%.*]] +// SIMD-ONLY0: cond.true4695: +// SIMD-ONLY0-NEXT: [[TMP2586:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4697:%.*]] +// SIMD-ONLY0: cond.false4696: +// SIMD-ONLY0-NEXT: [[TMP2587:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4697]] +// SIMD-ONLY0: cond.end4697: +// SIMD-ONLY0-NEXT: [[COND4698:%.*]] = phi float [ [[TMP2586]], [[COND_TRUE4695]] ], [ [[TMP2587]], [[COND_FALSE4696]] ] +// SIMD-ONLY0-NEXT: store float [[COND4698]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2588:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2589:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4699:%.*]] = fcmp olt float [[TMP2588]], [[TMP2589]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4699]], label [[COND_TRUE4701:%.*]], label [[COND_FALSE4702:%.*]] +// SIMD-ONLY0: cond.true4701: +// SIMD-ONLY0-NEXT: [[TMP2590:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4703:%.*]] +// SIMD-ONLY0: cond.false4702: +// SIMD-ONLY0-NEXT: [[TMP2591:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4703]] +// SIMD-ONLY0: cond.end4703: +// SIMD-ONLY0-NEXT: [[COND4704:%.*]] = phi float [ [[TMP2590]], [[COND_TRUE4701]] ], [ [[TMP2591]], [[COND_FALSE4702]] ] +// SIMD-ONLY0-NEXT: store float [[COND4704]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2592:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2593:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4705:%.*]] = fcmp ogt float [[TMP2592]], [[TMP2593]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4705]], label [[IF_THEN4707:%.*]], label [[IF_END4708:%.*]] +// SIMD-ONLY0: if.then4707: +// SIMD-ONLY0-NEXT: [[TMP2594:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2594]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4708]] +// SIMD-ONLY0: if.end4708: +// SIMD-ONLY0-NEXT: [[TMP2595:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2596:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4709:%.*]] = fcmp olt float [[TMP2595]], [[TMP2596]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4709]], label [[IF_THEN4711:%.*]], label [[IF_END4712:%.*]] +// SIMD-ONLY0: if.then4711: +// SIMD-ONLY0-NEXT: [[TMP2597:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2597]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4712]] +// SIMD-ONLY0: if.end4712: +// SIMD-ONLY0-NEXT: [[TMP2598:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2599:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4713:%.*]] = fcmp ogt float [[TMP2598]], [[TMP2599]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4713]], label [[IF_THEN4715:%.*]], label [[IF_END4716:%.*]] +// SIMD-ONLY0: if.then4715: +// SIMD-ONLY0-NEXT: [[TMP2600:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2600]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4716]] +// SIMD-ONLY0: if.end4716: +// SIMD-ONLY0-NEXT: [[TMP2601:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2602:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4717:%.*]] = fcmp olt float [[TMP2601]], [[TMP2602]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4717]], label [[IF_THEN4719:%.*]], label [[IF_END4720:%.*]] +// SIMD-ONLY0: if.then4719: +// SIMD-ONLY0-NEXT: [[TMP2603:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2603]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4720]] +// SIMD-ONLY0: if.end4720: +// SIMD-ONLY0-NEXT: [[TMP2604:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2605:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4721:%.*]] = fcmp ogt float [[TMP2604]], [[TMP2605]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4721]], label [[COND_TRUE4723:%.*]], label [[COND_FALSE4724:%.*]] +// SIMD-ONLY0: cond.true4723: +// SIMD-ONLY0-NEXT: [[TMP2606:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4725:%.*]] +// SIMD-ONLY0: cond.false4724: +// SIMD-ONLY0-NEXT: [[TMP2607:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4725]] +// SIMD-ONLY0: cond.end4725: +// SIMD-ONLY0-NEXT: [[COND4726:%.*]] = phi float [ [[TMP2606]], [[COND_TRUE4723]] ], [ [[TMP2607]], [[COND_FALSE4724]] ] +// SIMD-ONLY0-NEXT: store float [[COND4726]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2608:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2609:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4727:%.*]] = fcmp olt float [[TMP2608]], [[TMP2609]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4727]], label [[COND_TRUE4729:%.*]], label [[COND_FALSE4730:%.*]] +// SIMD-ONLY0: cond.true4729: +// SIMD-ONLY0-NEXT: [[TMP2610:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4731:%.*]] +// SIMD-ONLY0: cond.false4730: +// SIMD-ONLY0-NEXT: [[TMP2611:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4731]] +// SIMD-ONLY0: cond.end4731: +// SIMD-ONLY0-NEXT: [[COND4732:%.*]] = phi float [ [[TMP2610]], [[COND_TRUE4729]] ], [ [[TMP2611]], [[COND_FALSE4730]] ] +// SIMD-ONLY0-NEXT: store float [[COND4732]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2612:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2613:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4733:%.*]] = fcmp ogt float [[TMP2612]], [[TMP2613]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4733]], label [[COND_TRUE4735:%.*]], label [[COND_FALSE4736:%.*]] +// SIMD-ONLY0: cond.true4735: +// SIMD-ONLY0-NEXT: [[TMP2614:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4737:%.*]] +// SIMD-ONLY0: cond.false4736: +// SIMD-ONLY0-NEXT: [[TMP2615:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4737]] +// SIMD-ONLY0: cond.end4737: +// SIMD-ONLY0-NEXT: [[COND4738:%.*]] = phi float [ [[TMP2614]], [[COND_TRUE4735]] ], [ [[TMP2615]], [[COND_FALSE4736]] ] +// SIMD-ONLY0-NEXT: store float [[COND4738]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2616:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2617:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4739:%.*]] = fcmp olt float [[TMP2616]], [[TMP2617]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4739]], label [[COND_TRUE4741:%.*]], label [[COND_FALSE4742:%.*]] +// SIMD-ONLY0: cond.true4741: +// SIMD-ONLY0-NEXT: [[TMP2618:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4743:%.*]] +// SIMD-ONLY0: cond.false4742: +// SIMD-ONLY0-NEXT: [[TMP2619:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4743]] +// SIMD-ONLY0: cond.end4743: +// SIMD-ONLY0-NEXT: [[COND4744:%.*]] = phi float [ [[TMP2618]], [[COND_TRUE4741]] ], [ [[TMP2619]], [[COND_FALSE4742]] ] +// SIMD-ONLY0-NEXT: store float [[COND4744]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2620:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2621:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4745:%.*]] = fcmp ogt float [[TMP2620]], [[TMP2621]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4745]], label [[IF_THEN4747:%.*]], label [[IF_END4748:%.*]] +// SIMD-ONLY0: if.then4747: +// SIMD-ONLY0-NEXT: [[TMP2622:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2622]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4748]] +// SIMD-ONLY0: if.end4748: +// SIMD-ONLY0-NEXT: [[TMP2623:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2624:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4749:%.*]] = fcmp olt float [[TMP2623]], [[TMP2624]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4749]], label [[IF_THEN4751:%.*]], label [[IF_END4752:%.*]] +// SIMD-ONLY0: if.then4751: +// SIMD-ONLY0-NEXT: [[TMP2625:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2625]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4752]] +// SIMD-ONLY0: if.end4752: +// SIMD-ONLY0-NEXT: [[TMP2626:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2627:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4753:%.*]] = fcmp ogt float [[TMP2626]], [[TMP2627]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4753]], label [[IF_THEN4755:%.*]], label [[IF_END4756:%.*]] +// SIMD-ONLY0: if.then4755: +// SIMD-ONLY0-NEXT: [[TMP2628:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2628]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4756]] +// SIMD-ONLY0: if.end4756: +// SIMD-ONLY0-NEXT: [[TMP2629:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2630:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4757:%.*]] = fcmp olt float [[TMP2629]], [[TMP2630]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4757]], label [[IF_THEN4759:%.*]], label [[IF_END4760:%.*]] +// SIMD-ONLY0: if.then4759: +// SIMD-ONLY0-NEXT: [[TMP2631:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2631]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4760]] +// SIMD-ONLY0: if.end4760: +// SIMD-ONLY0-NEXT: [[TMP2632:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2633:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4761:%.*]] = fcmp ogt float [[TMP2632]], [[TMP2633]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4761]], label [[COND_TRUE4763:%.*]], label [[COND_FALSE4764:%.*]] +// SIMD-ONLY0: cond.true4763: +// SIMD-ONLY0-NEXT: [[TMP2634:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4765:%.*]] +// SIMD-ONLY0: cond.false4764: +// SIMD-ONLY0-NEXT: [[TMP2635:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4765]] +// SIMD-ONLY0: cond.end4765: +// SIMD-ONLY0-NEXT: [[COND4766:%.*]] = phi float [ [[TMP2634]], [[COND_TRUE4763]] ], [ [[TMP2635]], [[COND_FALSE4764]] ] +// SIMD-ONLY0-NEXT: store float [[COND4766]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2636:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2637:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4767:%.*]] = fcmp olt float [[TMP2636]], [[TMP2637]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4767]], label [[COND_TRUE4769:%.*]], label [[COND_FALSE4770:%.*]] +// SIMD-ONLY0: cond.true4769: +// SIMD-ONLY0-NEXT: [[TMP2638:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4771:%.*]] +// SIMD-ONLY0: cond.false4770: +// SIMD-ONLY0-NEXT: [[TMP2639:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4771]] +// SIMD-ONLY0: cond.end4771: +// SIMD-ONLY0-NEXT: [[COND4772:%.*]] = phi float [ [[TMP2638]], [[COND_TRUE4769]] ], [ [[TMP2639]], [[COND_FALSE4770]] ] +// SIMD-ONLY0-NEXT: store float [[COND4772]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2640:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2641:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4773:%.*]] = fcmp ogt float [[TMP2640]], [[TMP2641]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4773]], label [[COND_TRUE4775:%.*]], label [[COND_FALSE4776:%.*]] +// SIMD-ONLY0: cond.true4775: +// SIMD-ONLY0-NEXT: [[TMP2642:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4777:%.*]] +// SIMD-ONLY0: cond.false4776: +// SIMD-ONLY0-NEXT: [[TMP2643:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4777]] +// SIMD-ONLY0: cond.end4777: +// SIMD-ONLY0-NEXT: [[COND4778:%.*]] = phi float [ [[TMP2642]], [[COND_TRUE4775]] ], [ [[TMP2643]], [[COND_FALSE4776]] ] +// SIMD-ONLY0-NEXT: store float [[COND4778]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2644:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2645:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4779:%.*]] = fcmp olt float [[TMP2644]], [[TMP2645]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4779]], label [[COND_TRUE4781:%.*]], label [[COND_FALSE4782:%.*]] +// SIMD-ONLY0: cond.true4781: +// SIMD-ONLY0-NEXT: [[TMP2646:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4783:%.*]] +// SIMD-ONLY0: cond.false4782: +// SIMD-ONLY0-NEXT: [[TMP2647:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4783]] +// SIMD-ONLY0: cond.end4783: +// SIMD-ONLY0-NEXT: [[COND4784:%.*]] = phi float [ [[TMP2646]], [[COND_TRUE4781]] ], [ [[TMP2647]], [[COND_FALSE4782]] ] +// SIMD-ONLY0-NEXT: store float [[COND4784]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2648:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2649:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4785:%.*]] = fcmp ogt float [[TMP2648]], [[TMP2649]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4785]], label [[IF_THEN4787:%.*]], label [[IF_END4788:%.*]] +// SIMD-ONLY0: if.then4787: +// SIMD-ONLY0-NEXT: [[TMP2650:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2650]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4788]] +// SIMD-ONLY0: if.end4788: +// SIMD-ONLY0-NEXT: [[TMP2651:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2652:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4789:%.*]] = fcmp olt float [[TMP2651]], [[TMP2652]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4789]], label [[IF_THEN4791:%.*]], label [[IF_END4792:%.*]] +// SIMD-ONLY0: if.then4791: +// SIMD-ONLY0-NEXT: [[TMP2653:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2653]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4792]] +// SIMD-ONLY0: if.end4792: +// SIMD-ONLY0-NEXT: [[TMP2654:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2655:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4793:%.*]] = fcmp ogt float [[TMP2654]], [[TMP2655]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4793]], label [[IF_THEN4795:%.*]], label [[IF_END4796:%.*]] +// SIMD-ONLY0: if.then4795: +// SIMD-ONLY0-NEXT: [[TMP2656:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2656]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4796]] +// SIMD-ONLY0: if.end4796: +// SIMD-ONLY0-NEXT: [[TMP2657:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2658:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4797:%.*]] = fcmp olt float [[TMP2657]], [[TMP2658]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4797]], label [[IF_THEN4799:%.*]], label [[IF_END4800:%.*]] +// SIMD-ONLY0: if.then4799: +// SIMD-ONLY0-NEXT: [[TMP2659:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2659]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4800]] +// SIMD-ONLY0: if.end4800: +// SIMD-ONLY0-NEXT: [[TMP2660:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2661:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4801:%.*]] = fcmp ogt float [[TMP2660]], [[TMP2661]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4801]], label [[COND_TRUE4803:%.*]], label [[COND_FALSE4804:%.*]] +// SIMD-ONLY0: cond.true4803: +// SIMD-ONLY0-NEXT: [[TMP2662:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4805:%.*]] +// SIMD-ONLY0: cond.false4804: +// SIMD-ONLY0-NEXT: [[TMP2663:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4805]] +// SIMD-ONLY0: cond.end4805: +// SIMD-ONLY0-NEXT: [[COND4806:%.*]] = phi float [ [[TMP2662]], [[COND_TRUE4803]] ], [ [[TMP2663]], [[COND_FALSE4804]] ] +// SIMD-ONLY0-NEXT: store float [[COND4806]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2664:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2665:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4807:%.*]] = fcmp olt float [[TMP2664]], [[TMP2665]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4807]], label [[COND_TRUE4809:%.*]], label [[COND_FALSE4810:%.*]] +// SIMD-ONLY0: cond.true4809: +// SIMD-ONLY0-NEXT: [[TMP2666:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4811:%.*]] +// SIMD-ONLY0: cond.false4810: +// SIMD-ONLY0-NEXT: [[TMP2667:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4811]] +// SIMD-ONLY0: cond.end4811: +// SIMD-ONLY0-NEXT: [[COND4812:%.*]] = phi float [ [[TMP2666]], [[COND_TRUE4809]] ], [ [[TMP2667]], [[COND_FALSE4810]] ] +// SIMD-ONLY0-NEXT: store float [[COND4812]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2668:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2669:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4813:%.*]] = fcmp ogt float [[TMP2668]], [[TMP2669]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4813]], label [[COND_TRUE4815:%.*]], label [[COND_FALSE4816:%.*]] +// SIMD-ONLY0: cond.true4815: +// SIMD-ONLY0-NEXT: [[TMP2670:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4817:%.*]] +// SIMD-ONLY0: cond.false4816: +// SIMD-ONLY0-NEXT: [[TMP2671:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4817]] +// SIMD-ONLY0: cond.end4817: +// SIMD-ONLY0-NEXT: [[COND4818:%.*]] = phi float [ [[TMP2670]], [[COND_TRUE4815]] ], [ [[TMP2671]], [[COND_FALSE4816]] ] +// SIMD-ONLY0-NEXT: store float [[COND4818]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2672:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2673:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4819:%.*]] = fcmp olt float [[TMP2672]], [[TMP2673]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4819]], label [[COND_TRUE4821:%.*]], label [[COND_FALSE4822:%.*]] +// SIMD-ONLY0: cond.true4821: +// SIMD-ONLY0-NEXT: [[TMP2674:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4823:%.*]] +// SIMD-ONLY0: cond.false4822: +// SIMD-ONLY0-NEXT: [[TMP2675:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4823]] +// SIMD-ONLY0: cond.end4823: +// SIMD-ONLY0-NEXT: [[COND4824:%.*]] = phi float [ [[TMP2674]], [[COND_TRUE4821]] ], [ [[TMP2675]], [[COND_FALSE4822]] ] +// SIMD-ONLY0-NEXT: store float [[COND4824]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2676:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2677:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4825:%.*]] = fcmp ogt float [[TMP2676]], [[TMP2677]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4825]], label [[IF_THEN4827:%.*]], label [[IF_END4828:%.*]] +// SIMD-ONLY0: if.then4827: +// SIMD-ONLY0-NEXT: [[TMP2678:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2678]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4828]] +// SIMD-ONLY0: if.end4828: +// SIMD-ONLY0-NEXT: [[TMP2679:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2680:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4829:%.*]] = fcmp olt float [[TMP2679]], [[TMP2680]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4829]], label [[IF_THEN4831:%.*]], label [[IF_END4832:%.*]] +// SIMD-ONLY0: if.then4831: +// SIMD-ONLY0-NEXT: [[TMP2681:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2681]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4832]] +// SIMD-ONLY0: if.end4832: +// SIMD-ONLY0-NEXT: [[TMP2682:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2683:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4833:%.*]] = fcmp ogt float [[TMP2682]], [[TMP2683]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4833]], label [[IF_THEN4835:%.*]], label [[IF_END4836:%.*]] +// SIMD-ONLY0: if.then4835: +// SIMD-ONLY0-NEXT: [[TMP2684:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2684]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4836]] +// SIMD-ONLY0: if.end4836: +// SIMD-ONLY0-NEXT: [[TMP2685:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2686:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP4837:%.*]] = fcmp olt float [[TMP2685]], [[TMP2686]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4837]], label [[IF_THEN4839:%.*]], label [[IF_END4840:%.*]] +// SIMD-ONLY0: if.then4839: +// SIMD-ONLY0-NEXT: [[TMP2687:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP2687]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END4840]] +// SIMD-ONLY0: if.end4840: +// SIMD-ONLY0-NEXT: [[TMP2688:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2689:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4841:%.*]] = fcmp ogt double [[TMP2688]], [[TMP2689]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4841]], label [[COND_TRUE4843:%.*]], label [[COND_FALSE4844:%.*]] +// SIMD-ONLY0: cond.true4843: +// SIMD-ONLY0-NEXT: [[TMP2690:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4845:%.*]] +// SIMD-ONLY0: cond.false4844: +// SIMD-ONLY0-NEXT: [[TMP2691:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4845]] +// SIMD-ONLY0: cond.end4845: +// SIMD-ONLY0-NEXT: [[COND4846:%.*]] = phi double [ [[TMP2690]], [[COND_TRUE4843]] ], [ [[TMP2691]], [[COND_FALSE4844]] ] +// SIMD-ONLY0-NEXT: store double [[COND4846]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2692:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2693:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4847:%.*]] = fcmp olt double [[TMP2692]], [[TMP2693]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4847]], label [[COND_TRUE4849:%.*]], label [[COND_FALSE4850:%.*]] +// SIMD-ONLY0: cond.true4849: +// SIMD-ONLY0-NEXT: [[TMP2694:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4851:%.*]] +// SIMD-ONLY0: cond.false4850: +// SIMD-ONLY0-NEXT: [[TMP2695:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4851]] +// SIMD-ONLY0: cond.end4851: +// SIMD-ONLY0-NEXT: [[COND4852:%.*]] = phi double [ [[TMP2694]], [[COND_TRUE4849]] ], [ [[TMP2695]], [[COND_FALSE4850]] ] +// SIMD-ONLY0-NEXT: store double [[COND4852]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2696:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2697:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4853:%.*]] = fcmp ogt double [[TMP2696]], [[TMP2697]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4853]], label [[COND_TRUE4855:%.*]], label [[COND_FALSE4856:%.*]] +// SIMD-ONLY0: cond.true4855: +// SIMD-ONLY0-NEXT: [[TMP2698:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4857:%.*]] +// SIMD-ONLY0: cond.false4856: +// SIMD-ONLY0-NEXT: [[TMP2699:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4857]] +// SIMD-ONLY0: cond.end4857: +// SIMD-ONLY0-NEXT: [[COND4858:%.*]] = phi double [ [[TMP2698]], [[COND_TRUE4855]] ], [ [[TMP2699]], [[COND_FALSE4856]] ] +// SIMD-ONLY0-NEXT: store double [[COND4858]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2700:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2701:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4859:%.*]] = fcmp olt double [[TMP2700]], [[TMP2701]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4859]], label [[COND_TRUE4861:%.*]], label [[COND_FALSE4862:%.*]] +// SIMD-ONLY0: cond.true4861: +// SIMD-ONLY0-NEXT: [[TMP2702:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4863:%.*]] +// SIMD-ONLY0: cond.false4862: +// SIMD-ONLY0-NEXT: [[TMP2703:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4863]] +// SIMD-ONLY0: cond.end4863: +// SIMD-ONLY0-NEXT: [[COND4864:%.*]] = phi double [ [[TMP2702]], [[COND_TRUE4861]] ], [ [[TMP2703]], [[COND_FALSE4862]] ] +// SIMD-ONLY0-NEXT: store double [[COND4864]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2704:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2705:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4865:%.*]] = fcmp ogt double [[TMP2704]], [[TMP2705]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4865]], label [[IF_THEN4867:%.*]], label [[IF_END4868:%.*]] +// SIMD-ONLY0: if.then4867: +// SIMD-ONLY0-NEXT: [[TMP2706:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2706]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4868]] +// SIMD-ONLY0: if.end4868: +// SIMD-ONLY0-NEXT: [[TMP2707:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2708:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4869:%.*]] = fcmp olt double [[TMP2707]], [[TMP2708]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4869]], label [[IF_THEN4871:%.*]], label [[IF_END4872:%.*]] +// SIMD-ONLY0: if.then4871: +// SIMD-ONLY0-NEXT: [[TMP2709:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2709]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4872]] +// SIMD-ONLY0: if.end4872: +// SIMD-ONLY0-NEXT: [[TMP2710:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2711:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4873:%.*]] = fcmp ogt double [[TMP2710]], [[TMP2711]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4873]], label [[IF_THEN4875:%.*]], label [[IF_END4876:%.*]] +// SIMD-ONLY0: if.then4875: +// SIMD-ONLY0-NEXT: [[TMP2712:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2712]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4876]] +// SIMD-ONLY0: if.end4876: +// SIMD-ONLY0-NEXT: [[TMP2713:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2714:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4877:%.*]] = fcmp olt double [[TMP2713]], [[TMP2714]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4877]], label [[IF_THEN4879:%.*]], label [[IF_END4880:%.*]] +// SIMD-ONLY0: if.then4879: +// SIMD-ONLY0-NEXT: [[TMP2715:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2715]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4880]] +// SIMD-ONLY0: if.end4880: +// SIMD-ONLY0-NEXT: [[TMP2716:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2717:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4881:%.*]] = fcmp ogt double [[TMP2716]], [[TMP2717]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4881]], label [[COND_TRUE4883:%.*]], label [[COND_FALSE4884:%.*]] +// SIMD-ONLY0: cond.true4883: +// SIMD-ONLY0-NEXT: [[TMP2718:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4885:%.*]] +// SIMD-ONLY0: cond.false4884: +// SIMD-ONLY0-NEXT: [[TMP2719:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4885]] +// SIMD-ONLY0: cond.end4885: +// SIMD-ONLY0-NEXT: [[COND4886:%.*]] = phi double [ [[TMP2718]], [[COND_TRUE4883]] ], [ [[TMP2719]], [[COND_FALSE4884]] ] +// SIMD-ONLY0-NEXT: store double [[COND4886]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2720:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2721:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4887:%.*]] = fcmp olt double [[TMP2720]], [[TMP2721]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4887]], label [[COND_TRUE4889:%.*]], label [[COND_FALSE4890:%.*]] +// SIMD-ONLY0: cond.true4889: +// SIMD-ONLY0-NEXT: [[TMP2722:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4891:%.*]] +// SIMD-ONLY0: cond.false4890: +// SIMD-ONLY0-NEXT: [[TMP2723:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4891]] +// SIMD-ONLY0: cond.end4891: +// SIMD-ONLY0-NEXT: [[COND4892:%.*]] = phi double [ [[TMP2722]], [[COND_TRUE4889]] ], [ [[TMP2723]], [[COND_FALSE4890]] ] +// SIMD-ONLY0-NEXT: store double [[COND4892]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2724:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2725:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4893:%.*]] = fcmp ogt double [[TMP2724]], [[TMP2725]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4893]], label [[COND_TRUE4895:%.*]], label [[COND_FALSE4896:%.*]] +// SIMD-ONLY0: cond.true4895: +// SIMD-ONLY0-NEXT: [[TMP2726:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4897:%.*]] +// SIMD-ONLY0: cond.false4896: +// SIMD-ONLY0-NEXT: [[TMP2727:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4897]] +// SIMD-ONLY0: cond.end4897: +// SIMD-ONLY0-NEXT: [[COND4898:%.*]] = phi double [ [[TMP2726]], [[COND_TRUE4895]] ], [ [[TMP2727]], [[COND_FALSE4896]] ] +// SIMD-ONLY0-NEXT: store double [[COND4898]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2728:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2729:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4899:%.*]] = fcmp olt double [[TMP2728]], [[TMP2729]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4899]], label [[COND_TRUE4901:%.*]], label [[COND_FALSE4902:%.*]] +// SIMD-ONLY0: cond.true4901: +// SIMD-ONLY0-NEXT: [[TMP2730:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4903:%.*]] +// SIMD-ONLY0: cond.false4902: +// SIMD-ONLY0-NEXT: [[TMP2731:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4903]] +// SIMD-ONLY0: cond.end4903: +// SIMD-ONLY0-NEXT: [[COND4904:%.*]] = phi double [ [[TMP2730]], [[COND_TRUE4901]] ], [ [[TMP2731]], [[COND_FALSE4902]] ] +// SIMD-ONLY0-NEXT: store double [[COND4904]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2732:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2733:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4905:%.*]] = fcmp ogt double [[TMP2732]], [[TMP2733]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4905]], label [[IF_THEN4907:%.*]], label [[IF_END4908:%.*]] +// SIMD-ONLY0: if.then4907: +// SIMD-ONLY0-NEXT: [[TMP2734:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2734]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4908]] +// SIMD-ONLY0: if.end4908: +// SIMD-ONLY0-NEXT: [[TMP2735:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2736:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4909:%.*]] = fcmp olt double [[TMP2735]], [[TMP2736]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4909]], label [[IF_THEN4911:%.*]], label [[IF_END4912:%.*]] +// SIMD-ONLY0: if.then4911: +// SIMD-ONLY0-NEXT: [[TMP2737:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2737]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4912]] +// SIMD-ONLY0: if.end4912: +// SIMD-ONLY0-NEXT: [[TMP2738:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2739:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4913:%.*]] = fcmp ogt double [[TMP2738]], [[TMP2739]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4913]], label [[IF_THEN4915:%.*]], label [[IF_END4916:%.*]] +// SIMD-ONLY0: if.then4915: +// SIMD-ONLY0-NEXT: [[TMP2740:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2740]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4916]] +// SIMD-ONLY0: if.end4916: +// SIMD-ONLY0-NEXT: [[TMP2741:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2742:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4917:%.*]] = fcmp olt double [[TMP2741]], [[TMP2742]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4917]], label [[IF_THEN4919:%.*]], label [[IF_END4920:%.*]] +// SIMD-ONLY0: if.then4919: +// SIMD-ONLY0-NEXT: [[TMP2743:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2743]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4920]] +// SIMD-ONLY0: if.end4920: +// SIMD-ONLY0-NEXT: [[TMP2744:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2745:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4921:%.*]] = fcmp ogt double [[TMP2744]], [[TMP2745]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4921]], label [[COND_TRUE4923:%.*]], label [[COND_FALSE4924:%.*]] +// SIMD-ONLY0: cond.true4923: +// SIMD-ONLY0-NEXT: [[TMP2746:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4925:%.*]] +// SIMD-ONLY0: cond.false4924: +// SIMD-ONLY0-NEXT: [[TMP2747:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4925]] +// SIMD-ONLY0: cond.end4925: +// SIMD-ONLY0-NEXT: [[COND4926:%.*]] = phi double [ [[TMP2746]], [[COND_TRUE4923]] ], [ [[TMP2747]], [[COND_FALSE4924]] ] +// SIMD-ONLY0-NEXT: store double [[COND4926]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2748:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2749:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4927:%.*]] = fcmp olt double [[TMP2748]], [[TMP2749]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4927]], label [[COND_TRUE4929:%.*]], label [[COND_FALSE4930:%.*]] +// SIMD-ONLY0: cond.true4929: +// SIMD-ONLY0-NEXT: [[TMP2750:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4931:%.*]] +// SIMD-ONLY0: cond.false4930: +// SIMD-ONLY0-NEXT: [[TMP2751:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4931]] +// SIMD-ONLY0: cond.end4931: +// SIMD-ONLY0-NEXT: [[COND4932:%.*]] = phi double [ [[TMP2750]], [[COND_TRUE4929]] ], [ [[TMP2751]], [[COND_FALSE4930]] ] +// SIMD-ONLY0-NEXT: store double [[COND4932]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2752:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2753:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4933:%.*]] = fcmp ogt double [[TMP2752]], [[TMP2753]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4933]], label [[COND_TRUE4935:%.*]], label [[COND_FALSE4936:%.*]] +// SIMD-ONLY0: cond.true4935: +// SIMD-ONLY0-NEXT: [[TMP2754:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4937:%.*]] +// SIMD-ONLY0: cond.false4936: +// SIMD-ONLY0-NEXT: [[TMP2755:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4937]] +// SIMD-ONLY0: cond.end4937: +// SIMD-ONLY0-NEXT: [[COND4938:%.*]] = phi double [ [[TMP2754]], [[COND_TRUE4935]] ], [ [[TMP2755]], [[COND_FALSE4936]] ] +// SIMD-ONLY0-NEXT: store double [[COND4938]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2756:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2757:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4939:%.*]] = fcmp olt double [[TMP2756]], [[TMP2757]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4939]], label [[COND_TRUE4941:%.*]], label [[COND_FALSE4942:%.*]] +// SIMD-ONLY0: cond.true4941: +// SIMD-ONLY0-NEXT: [[TMP2758:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4943:%.*]] +// SIMD-ONLY0: cond.false4942: +// SIMD-ONLY0-NEXT: [[TMP2759:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4943]] +// SIMD-ONLY0: cond.end4943: +// SIMD-ONLY0-NEXT: [[COND4944:%.*]] = phi double [ [[TMP2758]], [[COND_TRUE4941]] ], [ [[TMP2759]], [[COND_FALSE4942]] ] +// SIMD-ONLY0-NEXT: store double [[COND4944]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2760:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2761:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4945:%.*]] = fcmp ogt double [[TMP2760]], [[TMP2761]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4945]], label [[IF_THEN4947:%.*]], label [[IF_END4948:%.*]] +// SIMD-ONLY0: if.then4947: +// SIMD-ONLY0-NEXT: [[TMP2762:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2762]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4948]] +// SIMD-ONLY0: if.end4948: +// SIMD-ONLY0-NEXT: [[TMP2763:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2764:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4949:%.*]] = fcmp olt double [[TMP2763]], [[TMP2764]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4949]], label [[IF_THEN4951:%.*]], label [[IF_END4952:%.*]] +// SIMD-ONLY0: if.then4951: +// SIMD-ONLY0-NEXT: [[TMP2765:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2765]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4952]] +// SIMD-ONLY0: if.end4952: +// SIMD-ONLY0-NEXT: [[TMP2766:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2767:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4953:%.*]] = fcmp ogt double [[TMP2766]], [[TMP2767]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4953]], label [[IF_THEN4955:%.*]], label [[IF_END4956:%.*]] +// SIMD-ONLY0: if.then4955: +// SIMD-ONLY0-NEXT: [[TMP2768:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2768]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4956]] +// SIMD-ONLY0: if.end4956: +// SIMD-ONLY0-NEXT: [[TMP2769:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2770:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4957:%.*]] = fcmp olt double [[TMP2769]], [[TMP2770]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4957]], label [[IF_THEN4959:%.*]], label [[IF_END4960:%.*]] +// SIMD-ONLY0: if.then4959: +// SIMD-ONLY0-NEXT: [[TMP2771:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2771]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4960]] +// SIMD-ONLY0: if.end4960: +// SIMD-ONLY0-NEXT: [[TMP2772:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2773:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4961:%.*]] = fcmp ogt double [[TMP2772]], [[TMP2773]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4961]], label [[COND_TRUE4963:%.*]], label [[COND_FALSE4964:%.*]] +// SIMD-ONLY0: cond.true4963: +// SIMD-ONLY0-NEXT: [[TMP2774:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4965:%.*]] +// SIMD-ONLY0: cond.false4964: +// SIMD-ONLY0-NEXT: [[TMP2775:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4965]] +// SIMD-ONLY0: cond.end4965: +// SIMD-ONLY0-NEXT: [[COND4966:%.*]] = phi double [ [[TMP2774]], [[COND_TRUE4963]] ], [ [[TMP2775]], [[COND_FALSE4964]] ] +// SIMD-ONLY0-NEXT: store double [[COND4966]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2776:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2777:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4967:%.*]] = fcmp olt double [[TMP2776]], [[TMP2777]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4967]], label [[COND_TRUE4969:%.*]], label [[COND_FALSE4970:%.*]] +// SIMD-ONLY0: cond.true4969: +// SIMD-ONLY0-NEXT: [[TMP2778:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4971:%.*]] +// SIMD-ONLY0: cond.false4970: +// SIMD-ONLY0-NEXT: [[TMP2779:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4971]] +// SIMD-ONLY0: cond.end4971: +// SIMD-ONLY0-NEXT: [[COND4972:%.*]] = phi double [ [[TMP2778]], [[COND_TRUE4969]] ], [ [[TMP2779]], [[COND_FALSE4970]] ] +// SIMD-ONLY0-NEXT: store double [[COND4972]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2780:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2781:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4973:%.*]] = fcmp ogt double [[TMP2780]], [[TMP2781]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4973]], label [[COND_TRUE4975:%.*]], label [[COND_FALSE4976:%.*]] +// SIMD-ONLY0: cond.true4975: +// SIMD-ONLY0-NEXT: [[TMP2782:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4977:%.*]] +// SIMD-ONLY0: cond.false4976: +// SIMD-ONLY0-NEXT: [[TMP2783:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4977]] +// SIMD-ONLY0: cond.end4977: +// SIMD-ONLY0-NEXT: [[COND4978:%.*]] = phi double [ [[TMP2782]], [[COND_TRUE4975]] ], [ [[TMP2783]], [[COND_FALSE4976]] ] +// SIMD-ONLY0-NEXT: store double [[COND4978]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2784:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2785:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4979:%.*]] = fcmp olt double [[TMP2784]], [[TMP2785]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4979]], label [[COND_TRUE4981:%.*]], label [[COND_FALSE4982:%.*]] +// SIMD-ONLY0: cond.true4981: +// SIMD-ONLY0-NEXT: [[TMP2786:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4983:%.*]] +// SIMD-ONLY0: cond.false4982: +// SIMD-ONLY0-NEXT: [[TMP2787:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4983]] +// SIMD-ONLY0: cond.end4983: +// SIMD-ONLY0-NEXT: [[COND4984:%.*]] = phi double [ [[TMP2786]], [[COND_TRUE4981]] ], [ [[TMP2787]], [[COND_FALSE4982]] ] +// SIMD-ONLY0-NEXT: store double [[COND4984]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2788:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2789:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4985:%.*]] = fcmp ogt double [[TMP2788]], [[TMP2789]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4985]], label [[IF_THEN4987:%.*]], label [[IF_END4988:%.*]] +// SIMD-ONLY0: if.then4987: +// SIMD-ONLY0-NEXT: [[TMP2790:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2790]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4988]] +// SIMD-ONLY0: if.end4988: +// SIMD-ONLY0-NEXT: [[TMP2791:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2792:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4989:%.*]] = fcmp olt double [[TMP2791]], [[TMP2792]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4989]], label [[IF_THEN4991:%.*]], label [[IF_END4992:%.*]] +// SIMD-ONLY0: if.then4991: +// SIMD-ONLY0-NEXT: [[TMP2793:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2793]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4992]] +// SIMD-ONLY0: if.end4992: +// SIMD-ONLY0-NEXT: [[TMP2794:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2795:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4993:%.*]] = fcmp ogt double [[TMP2794]], [[TMP2795]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4993]], label [[IF_THEN4995:%.*]], label [[IF_END4996:%.*]] +// SIMD-ONLY0: if.then4995: +// SIMD-ONLY0-NEXT: [[TMP2796:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2796]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4996]] +// SIMD-ONLY0: if.end4996: +// SIMD-ONLY0-NEXT: [[TMP2797:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2798:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4997:%.*]] = fcmp olt double [[TMP2797]], [[TMP2798]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4997]], label [[IF_THEN4999:%.*]], label [[IF_END5000:%.*]] +// SIMD-ONLY0: if.then4999: +// SIMD-ONLY0-NEXT: [[TMP2799:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2799]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5000]] +// SIMD-ONLY0: if.end5000: +// SIMD-ONLY0-NEXT: [[TMP2800:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2801:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5001:%.*]] = fcmp ogt double [[TMP2800]], [[TMP2801]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5001]], label [[COND_TRUE5003:%.*]], label [[COND_FALSE5004:%.*]] +// SIMD-ONLY0: cond.true5003: +// SIMD-ONLY0-NEXT: [[TMP2802:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5005:%.*]] +// SIMD-ONLY0: cond.false5004: +// SIMD-ONLY0-NEXT: [[TMP2803:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5005]] +// SIMD-ONLY0: cond.end5005: +// SIMD-ONLY0-NEXT: [[COND5006:%.*]] = phi double [ [[TMP2802]], [[COND_TRUE5003]] ], [ [[TMP2803]], [[COND_FALSE5004]] ] +// SIMD-ONLY0-NEXT: store double [[COND5006]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2804:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2805:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5007:%.*]] = fcmp olt double [[TMP2804]], [[TMP2805]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5007]], label [[COND_TRUE5009:%.*]], label [[COND_FALSE5010:%.*]] +// SIMD-ONLY0: cond.true5009: +// SIMD-ONLY0-NEXT: [[TMP2806:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5011:%.*]] +// SIMD-ONLY0: cond.false5010: +// SIMD-ONLY0-NEXT: [[TMP2807:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5011]] +// SIMD-ONLY0: cond.end5011: +// SIMD-ONLY0-NEXT: [[COND5012:%.*]] = phi double [ [[TMP2806]], [[COND_TRUE5009]] ], [ [[TMP2807]], [[COND_FALSE5010]] ] +// SIMD-ONLY0-NEXT: store double [[COND5012]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2808:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2809:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5013:%.*]] = fcmp ogt double [[TMP2808]], [[TMP2809]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5013]], label [[COND_TRUE5015:%.*]], label [[COND_FALSE5016:%.*]] +// SIMD-ONLY0: cond.true5015: +// SIMD-ONLY0-NEXT: [[TMP2810:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5017:%.*]] +// SIMD-ONLY0: cond.false5016: +// SIMD-ONLY0-NEXT: [[TMP2811:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5017]] +// SIMD-ONLY0: cond.end5017: +// SIMD-ONLY0-NEXT: [[COND5018:%.*]] = phi double [ [[TMP2810]], [[COND_TRUE5015]] ], [ [[TMP2811]], [[COND_FALSE5016]] ] +// SIMD-ONLY0-NEXT: store double [[COND5018]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2812:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2813:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5019:%.*]] = fcmp olt double [[TMP2812]], [[TMP2813]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5019]], label [[COND_TRUE5021:%.*]], label [[COND_FALSE5022:%.*]] +// SIMD-ONLY0: cond.true5021: +// SIMD-ONLY0-NEXT: [[TMP2814:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5023:%.*]] +// SIMD-ONLY0: cond.false5022: +// SIMD-ONLY0-NEXT: [[TMP2815:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5023]] +// SIMD-ONLY0: cond.end5023: +// SIMD-ONLY0-NEXT: [[COND5024:%.*]] = phi double [ [[TMP2814]], [[COND_TRUE5021]] ], [ [[TMP2815]], [[COND_FALSE5022]] ] +// SIMD-ONLY0-NEXT: store double [[COND5024]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2816:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2817:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5025:%.*]] = fcmp ogt double [[TMP2816]], [[TMP2817]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5025]], label [[IF_THEN5027:%.*]], label [[IF_END5028:%.*]] +// SIMD-ONLY0: if.then5027: +// SIMD-ONLY0-NEXT: [[TMP2818:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2818]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5028]] +// SIMD-ONLY0: if.end5028: +// SIMD-ONLY0-NEXT: [[TMP2819:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2820:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5029:%.*]] = fcmp olt double [[TMP2819]], [[TMP2820]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5029]], label [[IF_THEN5031:%.*]], label [[IF_END5032:%.*]] +// SIMD-ONLY0: if.then5031: +// SIMD-ONLY0-NEXT: [[TMP2821:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2821]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5032]] +// SIMD-ONLY0: if.end5032: +// SIMD-ONLY0-NEXT: [[TMP2822:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2823:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5033:%.*]] = fcmp ogt double [[TMP2822]], [[TMP2823]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5033]], label [[IF_THEN5035:%.*]], label [[IF_END5036:%.*]] +// SIMD-ONLY0: if.then5035: +// SIMD-ONLY0-NEXT: [[TMP2824:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2824]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5036]] +// SIMD-ONLY0: if.end5036: +// SIMD-ONLY0-NEXT: [[TMP2825:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2826:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5037:%.*]] = fcmp olt double [[TMP2825]], [[TMP2826]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5037]], label [[IF_THEN5039:%.*]], label [[IF_END5040:%.*]] +// SIMD-ONLY0: if.then5039: +// SIMD-ONLY0-NEXT: [[TMP2827:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2827]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5040]] +// SIMD-ONLY0: if.end5040: +// SIMD-ONLY0-NEXT: [[TMP2828:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2829:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5041:%.*]] = fcmp ogt double [[TMP2828]], [[TMP2829]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5041]], label [[COND_TRUE5043:%.*]], label [[COND_FALSE5044:%.*]] +// SIMD-ONLY0: cond.true5043: +// SIMD-ONLY0-NEXT: [[TMP2830:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5045:%.*]] +// SIMD-ONLY0: cond.false5044: +// SIMD-ONLY0-NEXT: [[TMP2831:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5045]] +// SIMD-ONLY0: cond.end5045: +// SIMD-ONLY0-NEXT: [[COND5046:%.*]] = phi double [ [[TMP2830]], [[COND_TRUE5043]] ], [ [[TMP2831]], [[COND_FALSE5044]] ] +// SIMD-ONLY0-NEXT: store double [[COND5046]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2832:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2833:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5047:%.*]] = fcmp olt double [[TMP2832]], [[TMP2833]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5047]], label [[COND_TRUE5049:%.*]], label [[COND_FALSE5050:%.*]] +// SIMD-ONLY0: cond.true5049: +// SIMD-ONLY0-NEXT: [[TMP2834:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5051:%.*]] +// SIMD-ONLY0: cond.false5050: +// SIMD-ONLY0-NEXT: [[TMP2835:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5051]] +// SIMD-ONLY0: cond.end5051: +// SIMD-ONLY0-NEXT: [[COND5052:%.*]] = phi double [ [[TMP2834]], [[COND_TRUE5049]] ], [ [[TMP2835]], [[COND_FALSE5050]] ] +// SIMD-ONLY0-NEXT: store double [[COND5052]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2836:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2837:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5053:%.*]] = fcmp ogt double [[TMP2836]], [[TMP2837]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5053]], label [[COND_TRUE5055:%.*]], label [[COND_FALSE5056:%.*]] +// SIMD-ONLY0: cond.true5055: +// SIMD-ONLY0-NEXT: [[TMP2838:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5057:%.*]] +// SIMD-ONLY0: cond.false5056: +// SIMD-ONLY0-NEXT: [[TMP2839:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5057]] +// SIMD-ONLY0: cond.end5057: +// SIMD-ONLY0-NEXT: [[COND5058:%.*]] = phi double [ [[TMP2838]], [[COND_TRUE5055]] ], [ [[TMP2839]], [[COND_FALSE5056]] ] +// SIMD-ONLY0-NEXT: store double [[COND5058]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2840:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2841:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5059:%.*]] = fcmp olt double [[TMP2840]], [[TMP2841]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5059]], label [[COND_TRUE5061:%.*]], label [[COND_FALSE5062:%.*]] +// SIMD-ONLY0: cond.true5061: +// SIMD-ONLY0-NEXT: [[TMP2842:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5063:%.*]] +// SIMD-ONLY0: cond.false5062: +// SIMD-ONLY0-NEXT: [[TMP2843:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END5063]] +// SIMD-ONLY0: cond.end5063: +// SIMD-ONLY0-NEXT: [[COND5064:%.*]] = phi double [ [[TMP2842]], [[COND_TRUE5061]] ], [ [[TMP2843]], [[COND_FALSE5062]] ] +// SIMD-ONLY0-NEXT: store double [[COND5064]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2844:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2845:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5065:%.*]] = fcmp ogt double [[TMP2844]], [[TMP2845]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5065]], label [[IF_THEN5067:%.*]], label [[IF_END5068:%.*]] +// SIMD-ONLY0: if.then5067: +// SIMD-ONLY0-NEXT: [[TMP2846:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2846]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5068]] +// SIMD-ONLY0: if.end5068: +// SIMD-ONLY0-NEXT: [[TMP2847:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2848:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5069:%.*]] = fcmp olt double [[TMP2847]], [[TMP2848]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5069]], label [[IF_THEN5071:%.*]], label [[IF_END5072:%.*]] +// SIMD-ONLY0: if.then5071: +// SIMD-ONLY0-NEXT: [[TMP2849:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2849]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5072]] +// SIMD-ONLY0: if.end5072: +// SIMD-ONLY0-NEXT: [[TMP2850:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2851:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5073:%.*]] = fcmp ogt double [[TMP2850]], [[TMP2851]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5073]], label [[IF_THEN5075:%.*]], label [[IF_END5076:%.*]] +// SIMD-ONLY0: if.then5075: +// SIMD-ONLY0-NEXT: [[TMP2852:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2852]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5076]] +// SIMD-ONLY0: if.end5076: +// SIMD-ONLY0-NEXT: [[TMP2853:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2854:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5077:%.*]] = fcmp olt double [[TMP2853]], [[TMP2854]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5077]], label [[IF_THEN5079:%.*]], label [[IF_END5080:%.*]] +// SIMD-ONLY0: if.then5079: +// SIMD-ONLY0-NEXT: [[TMP2855:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP2855]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5080]] +// SIMD-ONLY0: if.end5080: +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: @bar( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[CX:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CV:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CR:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CE:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CD:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCX:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCV:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCR:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCE:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCD:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[SX:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SV:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SR:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SE:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SD:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USX:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USV:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USR:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USE:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USD:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[IX:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[IR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[IE:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[ID:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIX:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIE:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UID:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[LX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LR:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULR:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLR:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLR:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[FX:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FV:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FE:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FD:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[DX:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DV:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DE:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DD:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP0]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i8 [[TMP2]] to i32 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +// SIMD-ONLY0: if.then: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP3]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END]] +// SIMD-ONLY0: if.end: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP4]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV3:%.*]] = sext i8 [[TMP5]] to i32 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 +// SIMD-ONLY0-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[CONV3]], [[CONV4]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5]], label [[IF_THEN7:%.*]], label [[IF_END8:%.*]] +// SIMD-ONLY0: if.then7: +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP7]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END8]] +// SIMD-ONLY0: if.end8: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP8]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV9:%.*]] = sext i8 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV10:%.*]] = sext i8 [[TMP10]] to i32 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = icmp slt i32 [[CONV9]], [[CONV10]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[IF_THEN13:%.*]], label [[IF_END14:%.*]] +// SIMD-ONLY0: if.then13: +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP11]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END14]] +// SIMD-ONLY0: if.end14: +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP12]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV15:%.*]] = sext i8 [[TMP13]] to i32 +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV16:%.*]] = sext i8 [[TMP14]] to i32 +// SIMD-ONLY0-NEXT: [[CMP17:%.*]] = icmp slt i32 [[CONV15]], [[CONV16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP17]], label [[IF_THEN19:%.*]], label [[IF_END20:%.*]] +// SIMD-ONLY0: if.then19: +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP15]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END20]] +// SIMD-ONLY0: if.end20: +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP16]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV21:%.*]] = sext i8 [[TMP17]] to i32 +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV22:%.*]] = sext i8 [[TMP18]] to i32 +// SIMD-ONLY0-NEXT: [[CMP23:%.*]] = icmp eq i32 [[CONV21]], [[CONV22]] +// SIMD-ONLY0-NEXT: br i1 [[CMP23]], label [[IF_THEN25:%.*]], label [[IF_END26:%.*]] +// SIMD-ONLY0: if.then25: +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP19]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END26]] +// SIMD-ONLY0: if.end26: +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP20]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV27:%.*]] = sext i8 [[TMP21]] to i32 +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV28:%.*]] = sext i8 [[TMP22]] to i32 +// SIMD-ONLY0-NEXT: [[CMP29:%.*]] = icmp eq i32 [[CONV27]], [[CONV28]] +// SIMD-ONLY0-NEXT: br i1 [[CMP29]], label [[IF_THEN31:%.*]], label [[IF_END32:%.*]] +// SIMD-ONLY0: if.then31: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP23]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END32]] +// SIMD-ONLY0: if.end32: +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV33:%.*]] = sext i8 [[TMP24]] to i32 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV34:%.*]] = sext i8 [[TMP25]] to i32 +// SIMD-ONLY0-NEXT: [[CMP35:%.*]] = icmp sgt i32 [[CONV33]], [[CONV34]] +// SIMD-ONLY0-NEXT: br i1 [[CMP35]], label [[IF_THEN37:%.*]], label [[IF_END38:%.*]] +// SIMD-ONLY0: if.then37: +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP26]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END38]] +// SIMD-ONLY0: if.end38: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP27]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV39:%.*]] = sext i8 [[TMP28]] to i32 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV40:%.*]] = sext i8 [[TMP29]] to i32 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[CONV39]], [[CONV40]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[IF_THEN43:%.*]], label [[IF_END44:%.*]] +// SIMD-ONLY0: if.then43: +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP30]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END44]] +// SIMD-ONLY0: if.end44: +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP31]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV45:%.*]] = sext i8 [[TMP32]] to i32 +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV46:%.*]] = sext i8 [[TMP33]] to i32 +// SIMD-ONLY0-NEXT: [[CMP47:%.*]] = icmp slt i32 [[CONV45]], [[CONV46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP47]], label [[IF_THEN49:%.*]], label [[IF_END50:%.*]] +// SIMD-ONLY0: if.then49: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP34]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END50]] +// SIMD-ONLY0: if.end50: +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP35]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV51:%.*]] = sext i8 [[TMP36]] to i32 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV52:%.*]] = sext i8 [[TMP37]] to i32 +// SIMD-ONLY0-NEXT: [[CMP53:%.*]] = icmp slt i32 [[CONV51]], [[CONV52]] +// SIMD-ONLY0-NEXT: br i1 [[CMP53]], label [[IF_THEN55:%.*]], label [[IF_END56:%.*]] +// SIMD-ONLY0: if.then55: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP38]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END56]] +// SIMD-ONLY0: if.end56: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP39]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV57:%.*]] = sext i8 [[TMP40]] to i32 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV58:%.*]] = sext i8 [[TMP41]] to i32 +// SIMD-ONLY0-NEXT: [[CMP59:%.*]] = icmp eq i32 [[CONV57]], [[CONV58]] +// SIMD-ONLY0-NEXT: br i1 [[CMP59]], label [[IF_THEN61:%.*]], label [[IF_END62:%.*]] +// SIMD-ONLY0: if.then61: +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP42]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END62]] +// SIMD-ONLY0: if.end62: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP43]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV63:%.*]] = sext i8 [[TMP44]] to i32 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV64:%.*]] = sext i8 [[TMP45]] to i32 +// SIMD-ONLY0-NEXT: [[CMP65:%.*]] = icmp eq i32 [[CONV63]], [[CONV64]] +// SIMD-ONLY0-NEXT: br i1 [[CMP65]], label [[IF_THEN67:%.*]], label [[IF_END68:%.*]] +// SIMD-ONLY0: if.then67: +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP46]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END68]] +// SIMD-ONLY0: if.end68: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP47]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV69:%.*]] = sext i8 [[TMP48]] to i32 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV70:%.*]] = sext i8 [[TMP49]] to i32 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = icmp eq i32 [[CONV69]], [[CONV70]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[IF_THEN73:%.*]], label [[IF_ELSE:%.*]] +// SIMD-ONLY0: if.then73: +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP50]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END74:%.*]] +// SIMD-ONLY0: if.else: +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP51]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END74]] +// SIMD-ONLY0: if.end74: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV75:%.*]] = sext i8 [[TMP52]] to i32 +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV76:%.*]] = sext i8 [[TMP53]] to i32 +// SIMD-ONLY0-NEXT: [[CMP77:%.*]] = icmp eq i32 [[CONV75]], [[CONV76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP77]], label [[IF_THEN79:%.*]], label [[IF_ELSE80:%.*]] +// SIMD-ONLY0: if.then79: +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP54]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END81:%.*]] +// SIMD-ONLY0: if.else80: +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP55]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END81]] +// SIMD-ONLY0: if.end81: +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV82:%.*]] = sext i8 [[TMP56]] to i32 +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV83:%.*]] = sext i8 [[TMP57]] to i32 +// SIMD-ONLY0-NEXT: [[CMP84:%.*]] = icmp eq i32 [[CONV82]], [[CONV83]] +// SIMD-ONLY0-NEXT: [[CONV85:%.*]] = zext i1 [[CMP84]] to i32 +// SIMD-ONLY0-NEXT: [[CONV86:%.*]] = trunc i32 [[CONV85]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV86]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP58]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL]], label [[IF_THEN87:%.*]], label [[IF_END88:%.*]] +// SIMD-ONLY0: if.then87: +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP59]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END88]] +// SIMD-ONLY0: if.end88: +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV89:%.*]] = sext i8 [[TMP60]] to i32 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV90:%.*]] = sext i8 [[TMP61]] to i32 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp eq i32 [[CONV89]], [[CONV90]] +// SIMD-ONLY0-NEXT: [[CONV92:%.*]] = zext i1 [[CMP91]] to i32 +// SIMD-ONLY0-NEXT: [[CONV93:%.*]] = trunc i32 [[CONV92]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV93]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL94:%.*]] = icmp ne i8 [[TMP62]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL94]], label [[IF_THEN95:%.*]], label [[IF_END96:%.*]] +// SIMD-ONLY0: if.then95: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP63]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END96]] +// SIMD-ONLY0: if.end96: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV97:%.*]] = sext i8 [[TMP64]] to i32 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV98:%.*]] = sext i8 [[TMP65]] to i32 +// SIMD-ONLY0-NEXT: [[CMP99:%.*]] = icmp eq i32 [[CONV97]], [[CONV98]] +// SIMD-ONLY0-NEXT: [[CONV100:%.*]] = zext i1 [[CMP99]] to i32 +// SIMD-ONLY0-NEXT: [[CONV101:%.*]] = trunc i32 [[CONV100]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV101]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL102:%.*]] = icmp ne i8 [[TMP66]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL102]], label [[IF_THEN103:%.*]], label [[IF_ELSE104:%.*]] +// SIMD-ONLY0: if.then103: +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP67]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END105:%.*]] +// SIMD-ONLY0: if.else104: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP68]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END105]] +// SIMD-ONLY0: if.end105: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV106:%.*]] = sext i8 [[TMP69]] to i32 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV107:%.*]] = sext i8 [[TMP70]] to i32 +// SIMD-ONLY0-NEXT: [[CMP108:%.*]] = icmp eq i32 [[CONV106]], [[CONV107]] +// SIMD-ONLY0-NEXT: [[CONV109:%.*]] = zext i1 [[CMP108]] to i32 +// SIMD-ONLY0-NEXT: [[CONV110:%.*]] = trunc i32 [[CONV109]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV110]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL111:%.*]] = icmp ne i8 [[TMP71]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL111]], label [[IF_THEN112:%.*]], label [[IF_ELSE113:%.*]] +// SIMD-ONLY0: if.then112: +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP72]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END114:%.*]] +// SIMD-ONLY0: if.else113: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP73]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END114]] +// SIMD-ONLY0: if.end114: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP74]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV115:%.*]] = sext i8 [[TMP75]] to i32 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV116:%.*]] = sext i8 [[TMP76]] to i32 +// SIMD-ONLY0-NEXT: [[CMP117:%.*]] = icmp sgt i32 [[CONV115]], [[CONV116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP117]], label [[IF_THEN119:%.*]], label [[IF_END120:%.*]] +// SIMD-ONLY0: if.then119: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP77]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END120]] +// SIMD-ONLY0: if.end120: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP78]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV121:%.*]] = sext i8 [[TMP79]] to i32 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV122:%.*]] = sext i8 [[TMP80]] to i32 +// SIMD-ONLY0-NEXT: [[CMP123:%.*]] = icmp sgt i32 [[CONV121]], [[CONV122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP123]], label [[IF_THEN125:%.*]], label [[IF_END126:%.*]] +// SIMD-ONLY0: if.then125: +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP81]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END126]] +// SIMD-ONLY0: if.end126: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP82]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV127:%.*]] = sext i8 [[TMP83]] to i32 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV128:%.*]] = sext i8 [[TMP84]] to i32 +// SIMD-ONLY0-NEXT: [[CMP129:%.*]] = icmp slt i32 [[CONV127]], [[CONV128]] +// SIMD-ONLY0-NEXT: br i1 [[CMP129]], label [[IF_THEN131:%.*]], label [[IF_END132:%.*]] +// SIMD-ONLY0: if.then131: +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP85]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END132]] +// SIMD-ONLY0: if.end132: +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP86]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV133:%.*]] = sext i8 [[TMP87]] to i32 +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV134:%.*]] = sext i8 [[TMP88]] to i32 +// SIMD-ONLY0-NEXT: [[CMP135:%.*]] = icmp slt i32 [[CONV133]], [[CONV134]] +// SIMD-ONLY0-NEXT: br i1 [[CMP135]], label [[IF_THEN137:%.*]], label [[IF_END138:%.*]] +// SIMD-ONLY0: if.then137: +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP89]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END138]] +// SIMD-ONLY0: if.end138: +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP90]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV139:%.*]] = sext i8 [[TMP91]] to i32 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV140:%.*]] = sext i8 [[TMP92]] to i32 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp eq i32 [[CONV139]], [[CONV140]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[IF_THEN143:%.*]], label [[IF_END144:%.*]] +// SIMD-ONLY0: if.then143: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP93]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END144]] +// SIMD-ONLY0: if.end144: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP94]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV145:%.*]] = sext i8 [[TMP95]] to i32 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV146:%.*]] = sext i8 [[TMP96]] to i32 +// SIMD-ONLY0-NEXT: [[CMP147:%.*]] = icmp eq i32 [[CONV145]], [[CONV146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP147]], label [[IF_THEN149:%.*]], label [[IF_END150:%.*]] +// SIMD-ONLY0: if.then149: +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP97]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END150]] +// SIMD-ONLY0: if.end150: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV151:%.*]] = sext i8 [[TMP98]] to i32 +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV152:%.*]] = sext i8 [[TMP99]] to i32 +// SIMD-ONLY0-NEXT: [[CMP153:%.*]] = icmp sgt i32 [[CONV151]], [[CONV152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP153]], label [[IF_THEN155:%.*]], label [[IF_END156:%.*]] +// SIMD-ONLY0: if.then155: +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP100]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END156]] +// SIMD-ONLY0: if.end156: +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP101]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV157:%.*]] = sext i8 [[TMP102]] to i32 +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV158:%.*]] = sext i8 [[TMP103]] to i32 +// SIMD-ONLY0-NEXT: [[CMP159:%.*]] = icmp sgt i32 [[CONV157]], [[CONV158]] +// SIMD-ONLY0-NEXT: br i1 [[CMP159]], label [[IF_THEN161:%.*]], label [[IF_END162:%.*]] +// SIMD-ONLY0: if.then161: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP104]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END162]] +// SIMD-ONLY0: if.end162: +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP105]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV163:%.*]] = sext i8 [[TMP106]] to i32 +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV164:%.*]] = sext i8 [[TMP107]] to i32 +// SIMD-ONLY0-NEXT: [[CMP165:%.*]] = icmp slt i32 [[CONV163]], [[CONV164]] +// SIMD-ONLY0-NEXT: br i1 [[CMP165]], label [[IF_THEN167:%.*]], label [[IF_END168:%.*]] +// SIMD-ONLY0: if.then167: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP108]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END168]] +// SIMD-ONLY0: if.end168: +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP109]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV169:%.*]] = sext i8 [[TMP110]] to i32 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV170:%.*]] = sext i8 [[TMP111]] to i32 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = icmp slt i32 [[CONV169]], [[CONV170]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[IF_THEN173:%.*]], label [[IF_END174:%.*]] +// SIMD-ONLY0: if.then173: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP112]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END174]] +// SIMD-ONLY0: if.end174: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP113]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV175:%.*]] = sext i8 [[TMP114]] to i32 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV176:%.*]] = sext i8 [[TMP115]] to i32 +// SIMD-ONLY0-NEXT: [[CMP177:%.*]] = icmp eq i32 [[CONV175]], [[CONV176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP177]], label [[IF_THEN179:%.*]], label [[IF_END180:%.*]] +// SIMD-ONLY0: if.then179: +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP116]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END180]] +// SIMD-ONLY0: if.end180: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP117]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV181:%.*]] = sext i8 [[TMP118]] to i32 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV182:%.*]] = sext i8 [[TMP119]] to i32 +// SIMD-ONLY0-NEXT: [[CMP183:%.*]] = icmp eq i32 [[CONV181]], [[CONV182]] +// SIMD-ONLY0-NEXT: br i1 [[CMP183]], label [[IF_THEN185:%.*]], label [[IF_END186:%.*]] +// SIMD-ONLY0: if.then185: +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP120]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END186]] +// SIMD-ONLY0: if.end186: +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP121]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV187:%.*]] = sext i8 [[TMP122]] to i32 +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV188:%.*]] = sext i8 [[TMP123]] to i32 +// SIMD-ONLY0-NEXT: [[CMP189:%.*]] = icmp eq i32 [[CONV187]], [[CONV188]] +// SIMD-ONLY0-NEXT: br i1 [[CMP189]], label [[IF_THEN191:%.*]], label [[IF_ELSE192:%.*]] +// SIMD-ONLY0: if.then191: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP124]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END193:%.*]] +// SIMD-ONLY0: if.else192: +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP125]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END193]] +// SIMD-ONLY0: if.end193: +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV194:%.*]] = sext i8 [[TMP126]] to i32 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV195:%.*]] = sext i8 [[TMP127]] to i32 +// SIMD-ONLY0-NEXT: [[CMP196:%.*]] = icmp eq i32 [[CONV194]], [[CONV195]] +// SIMD-ONLY0-NEXT: br i1 [[CMP196]], label [[IF_THEN198:%.*]], label [[IF_ELSE199:%.*]] +// SIMD-ONLY0: if.then198: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP128]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END200:%.*]] +// SIMD-ONLY0: if.else199: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP129]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END200]] +// SIMD-ONLY0: if.end200: +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV201:%.*]] = sext i8 [[TMP130]] to i32 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV202:%.*]] = sext i8 [[TMP131]] to i32 +// SIMD-ONLY0-NEXT: [[CMP203:%.*]] = icmp eq i32 [[CONV201]], [[CONV202]] +// SIMD-ONLY0-NEXT: [[CONV204:%.*]] = zext i1 [[CMP203]] to i32 +// SIMD-ONLY0-NEXT: [[CONV205:%.*]] = trunc i32 [[CONV204]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV205]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL206:%.*]] = icmp ne i8 [[TMP132]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL206]], label [[IF_THEN207:%.*]], label [[IF_END208:%.*]] +// SIMD-ONLY0: if.then207: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP133]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END208]] +// SIMD-ONLY0: if.end208: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV209:%.*]] = sext i8 [[TMP134]] to i32 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV210:%.*]] = sext i8 [[TMP135]] to i32 +// SIMD-ONLY0-NEXT: [[CMP211:%.*]] = icmp eq i32 [[CONV209]], [[CONV210]] +// SIMD-ONLY0-NEXT: [[CONV212:%.*]] = zext i1 [[CMP211]] to i32 +// SIMD-ONLY0-NEXT: [[CONV213:%.*]] = trunc i32 [[CONV212]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV213]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL214:%.*]] = icmp ne i8 [[TMP136]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL214]], label [[IF_THEN215:%.*]], label [[IF_END216:%.*]] +// SIMD-ONLY0: if.then215: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP137]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END216]] +// SIMD-ONLY0: if.end216: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV217:%.*]] = sext i8 [[TMP138]] to i32 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV218:%.*]] = sext i8 [[TMP139]] to i32 +// SIMD-ONLY0-NEXT: [[CMP219:%.*]] = icmp eq i32 [[CONV217]], [[CONV218]] +// SIMD-ONLY0-NEXT: [[CONV220:%.*]] = zext i1 [[CMP219]] to i32 +// SIMD-ONLY0-NEXT: [[CONV221:%.*]] = trunc i32 [[CONV220]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV221]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL222:%.*]] = icmp ne i8 [[TMP140]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL222]], label [[IF_THEN223:%.*]], label [[IF_ELSE224:%.*]] +// SIMD-ONLY0: if.then223: +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP141]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END225:%.*]] +// SIMD-ONLY0: if.else224: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP142]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END225]] +// SIMD-ONLY0: if.end225: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV226:%.*]] = sext i8 [[TMP143]] to i32 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV227:%.*]] = sext i8 [[TMP144]] to i32 +// SIMD-ONLY0-NEXT: [[CMP228:%.*]] = icmp eq i32 [[CONV226]], [[CONV227]] +// SIMD-ONLY0-NEXT: [[CONV229:%.*]] = zext i1 [[CMP228]] to i32 +// SIMD-ONLY0-NEXT: [[CONV230:%.*]] = trunc i32 [[CONV229]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV230]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL231:%.*]] = icmp ne i8 [[TMP145]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL231]], label [[IF_THEN232:%.*]], label [[IF_ELSE233:%.*]] +// SIMD-ONLY0: if.then232: +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP146]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END234:%.*]] +// SIMD-ONLY0: if.else233: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP147]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END234]] +// SIMD-ONLY0: if.end234: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP148]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV235:%.*]] = sext i8 [[TMP149]] to i32 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV236:%.*]] = sext i8 [[TMP150]] to i32 +// SIMD-ONLY0-NEXT: [[CMP237:%.*]] = icmp sgt i32 [[CONV235]], [[CONV236]] +// SIMD-ONLY0-NEXT: br i1 [[CMP237]], label [[IF_THEN239:%.*]], label [[IF_END240:%.*]] +// SIMD-ONLY0: if.then239: +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP151]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END240]] +// SIMD-ONLY0: if.end240: +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP152]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV241:%.*]] = sext i8 [[TMP153]] to i32 +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV242:%.*]] = sext i8 [[TMP154]] to i32 +// SIMD-ONLY0-NEXT: [[CMP243:%.*]] = icmp sgt i32 [[CONV241]], [[CONV242]] +// SIMD-ONLY0-NEXT: br i1 [[CMP243]], label [[IF_THEN245:%.*]], label [[IF_END246:%.*]] +// SIMD-ONLY0: if.then245: +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP155]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END246]] +// SIMD-ONLY0: if.end246: +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP156]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV247:%.*]] = sext i8 [[TMP157]] to i32 +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV248:%.*]] = sext i8 [[TMP158]] to i32 +// SIMD-ONLY0-NEXT: [[CMP249:%.*]] = icmp slt i32 [[CONV247]], [[CONV248]] +// SIMD-ONLY0-NEXT: br i1 [[CMP249]], label [[IF_THEN251:%.*]], label [[IF_END252:%.*]] +// SIMD-ONLY0: if.then251: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP159]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END252]] +// SIMD-ONLY0: if.end252: +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP160]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV253:%.*]] = sext i8 [[TMP161]] to i32 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV254:%.*]] = sext i8 [[TMP162]] to i32 +// SIMD-ONLY0-NEXT: [[CMP255:%.*]] = icmp slt i32 [[CONV253]], [[CONV254]] +// SIMD-ONLY0-NEXT: br i1 [[CMP255]], label [[IF_THEN257:%.*]], label [[IF_END258:%.*]] +// SIMD-ONLY0: if.then257: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP163]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END258]] +// SIMD-ONLY0: if.end258: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP164]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV259:%.*]] = sext i8 [[TMP165]] to i32 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV260:%.*]] = sext i8 [[TMP166]] to i32 +// SIMD-ONLY0-NEXT: [[CMP261:%.*]] = icmp eq i32 [[CONV259]], [[CONV260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP261]], label [[IF_THEN263:%.*]], label [[IF_END264:%.*]] +// SIMD-ONLY0: if.then263: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP167]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END264]] +// SIMD-ONLY0: if.end264: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP168]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV265:%.*]] = sext i8 [[TMP169]] to i32 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV266:%.*]] = sext i8 [[TMP170]] to i32 +// SIMD-ONLY0-NEXT: [[CMP267:%.*]] = icmp eq i32 [[CONV265]], [[CONV266]] +// SIMD-ONLY0-NEXT: br i1 [[CMP267]], label [[IF_THEN269:%.*]], label [[IF_END270:%.*]] +// SIMD-ONLY0: if.then269: +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP171]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END270]] +// SIMD-ONLY0: if.end270: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV271:%.*]] = sext i8 [[TMP172]] to i32 +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV272:%.*]] = sext i8 [[TMP173]] to i32 +// SIMD-ONLY0-NEXT: [[CMP273:%.*]] = icmp sgt i32 [[CONV271]], [[CONV272]] +// SIMD-ONLY0-NEXT: br i1 [[CMP273]], label [[IF_THEN275:%.*]], label [[IF_END276:%.*]] +// SIMD-ONLY0: if.then275: +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP174]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END276]] +// SIMD-ONLY0: if.end276: +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP175]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV277:%.*]] = sext i8 [[TMP176]] to i32 +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV278:%.*]] = sext i8 [[TMP177]] to i32 +// SIMD-ONLY0-NEXT: [[CMP279:%.*]] = icmp sgt i32 [[CONV277]], [[CONV278]] +// SIMD-ONLY0-NEXT: br i1 [[CMP279]], label [[IF_THEN281:%.*]], label [[IF_END282:%.*]] +// SIMD-ONLY0: if.then281: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP178]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END282]] +// SIMD-ONLY0: if.end282: +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP179]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV283:%.*]] = sext i8 [[TMP180]] to i32 +// SIMD-ONLY0-NEXT: [[TMP181:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV284:%.*]] = sext i8 [[TMP181]] to i32 +// SIMD-ONLY0-NEXT: [[CMP285:%.*]] = icmp slt i32 [[CONV283]], [[CONV284]] +// SIMD-ONLY0-NEXT: br i1 [[CMP285]], label [[IF_THEN287:%.*]], label [[IF_END288:%.*]] +// SIMD-ONLY0: if.then287: +// SIMD-ONLY0-NEXT: [[TMP182:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP182]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END288]] +// SIMD-ONLY0: if.end288: +// SIMD-ONLY0-NEXT: [[TMP183:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP183]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP184:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV289:%.*]] = sext i8 [[TMP184]] to i32 +// SIMD-ONLY0-NEXT: [[TMP185:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV290:%.*]] = sext i8 [[TMP185]] to i32 +// SIMD-ONLY0-NEXT: [[CMP291:%.*]] = icmp slt i32 [[CONV289]], [[CONV290]] +// SIMD-ONLY0-NEXT: br i1 [[CMP291]], label [[IF_THEN293:%.*]], label [[IF_END294:%.*]] +// SIMD-ONLY0: if.then293: +// SIMD-ONLY0-NEXT: [[TMP186:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP186]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END294]] +// SIMD-ONLY0: if.end294: +// SIMD-ONLY0-NEXT: [[TMP187:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP187]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP188:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV295:%.*]] = sext i8 [[TMP188]] to i32 +// SIMD-ONLY0-NEXT: [[TMP189:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV296:%.*]] = sext i8 [[TMP189]] to i32 +// SIMD-ONLY0-NEXT: [[CMP297:%.*]] = icmp eq i32 [[CONV295]], [[CONV296]] +// SIMD-ONLY0-NEXT: br i1 [[CMP297]], label [[IF_THEN299:%.*]], label [[IF_END300:%.*]] +// SIMD-ONLY0: if.then299: +// SIMD-ONLY0-NEXT: [[TMP190:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP190]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END300]] +// SIMD-ONLY0: if.end300: +// SIMD-ONLY0-NEXT: [[TMP191:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP191]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP192:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV301:%.*]] = sext i8 [[TMP192]] to i32 +// SIMD-ONLY0-NEXT: [[TMP193:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV302:%.*]] = sext i8 [[TMP193]] to i32 +// SIMD-ONLY0-NEXT: [[CMP303:%.*]] = icmp eq i32 [[CONV301]], [[CONV302]] +// SIMD-ONLY0-NEXT: br i1 [[CMP303]], label [[IF_THEN305:%.*]], label [[IF_END306:%.*]] +// SIMD-ONLY0: if.then305: +// SIMD-ONLY0-NEXT: [[TMP194:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP194]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END306]] +// SIMD-ONLY0: if.end306: +// SIMD-ONLY0-NEXT: [[TMP195:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP195]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP196:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV307:%.*]] = sext i8 [[TMP196]] to i32 +// SIMD-ONLY0-NEXT: [[TMP197:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV308:%.*]] = sext i8 [[TMP197]] to i32 +// SIMD-ONLY0-NEXT: [[CMP309:%.*]] = icmp eq i32 [[CONV307]], [[CONV308]] +// SIMD-ONLY0-NEXT: br i1 [[CMP309]], label [[IF_THEN311:%.*]], label [[IF_ELSE312:%.*]] +// SIMD-ONLY0: if.then311: +// SIMD-ONLY0-NEXT: [[TMP198:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP198]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END313:%.*]] +// SIMD-ONLY0: if.else312: +// SIMD-ONLY0-NEXT: [[TMP199:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP199]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END313]] +// SIMD-ONLY0: if.end313: +// SIMD-ONLY0-NEXT: [[TMP200:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV314:%.*]] = sext i8 [[TMP200]] to i32 +// SIMD-ONLY0-NEXT: [[TMP201:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV315:%.*]] = sext i8 [[TMP201]] to i32 +// SIMD-ONLY0-NEXT: [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]] +// SIMD-ONLY0-NEXT: br i1 [[CMP316]], label [[IF_THEN318:%.*]], label [[IF_ELSE319:%.*]] +// SIMD-ONLY0: if.then318: +// SIMD-ONLY0-NEXT: [[TMP202:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP202]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END320:%.*]] +// SIMD-ONLY0: if.else319: +// SIMD-ONLY0-NEXT: [[TMP203:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP203]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END320]] +// SIMD-ONLY0: if.end320: +// SIMD-ONLY0-NEXT: [[TMP204:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV321:%.*]] = sext i8 [[TMP204]] to i32 +// SIMD-ONLY0-NEXT: [[TMP205:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV322:%.*]] = sext i8 [[TMP205]] to i32 +// SIMD-ONLY0-NEXT: [[CMP323:%.*]] = icmp eq i32 [[CONV321]], [[CONV322]] +// SIMD-ONLY0-NEXT: [[CONV324:%.*]] = zext i1 [[CMP323]] to i32 +// SIMD-ONLY0-NEXT: [[CONV325:%.*]] = trunc i32 [[CONV324]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV325]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP206:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL326:%.*]] = icmp ne i8 [[TMP206]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL326]], label [[IF_THEN327:%.*]], label [[IF_END328:%.*]] +// SIMD-ONLY0: if.then327: +// SIMD-ONLY0-NEXT: [[TMP207:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP207]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END328]] +// SIMD-ONLY0: if.end328: +// SIMD-ONLY0-NEXT: [[TMP208:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV329:%.*]] = sext i8 [[TMP208]] to i32 +// SIMD-ONLY0-NEXT: [[TMP209:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV330:%.*]] = sext i8 [[TMP209]] to i32 +// SIMD-ONLY0-NEXT: [[CMP331:%.*]] = icmp eq i32 [[CONV329]], [[CONV330]] +// SIMD-ONLY0-NEXT: [[CONV332:%.*]] = zext i1 [[CMP331]] to i32 +// SIMD-ONLY0-NEXT: [[CONV333:%.*]] = trunc i32 [[CONV332]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV333]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP210:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL334:%.*]] = icmp ne i8 [[TMP210]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL334]], label [[IF_THEN335:%.*]], label [[IF_END336:%.*]] +// SIMD-ONLY0: if.then335: +// SIMD-ONLY0-NEXT: [[TMP211:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP211]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END336]] +// SIMD-ONLY0: if.end336: +// SIMD-ONLY0-NEXT: [[TMP212:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV337:%.*]] = sext i8 [[TMP212]] to i32 +// SIMD-ONLY0-NEXT: [[TMP213:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV338:%.*]] = sext i8 [[TMP213]] to i32 +// SIMD-ONLY0-NEXT: [[CMP339:%.*]] = icmp eq i32 [[CONV337]], [[CONV338]] +// SIMD-ONLY0-NEXT: [[CONV340:%.*]] = zext i1 [[CMP339]] to i32 +// SIMD-ONLY0-NEXT: [[CONV341:%.*]] = trunc i32 [[CONV340]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV341]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP214:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL342:%.*]] = icmp ne i8 [[TMP214]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL342]], label [[IF_THEN343:%.*]], label [[IF_ELSE344:%.*]] +// SIMD-ONLY0: if.then343: +// SIMD-ONLY0-NEXT: [[TMP215:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP215]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END345:%.*]] +// SIMD-ONLY0: if.else344: +// SIMD-ONLY0-NEXT: [[TMP216:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP216]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END345]] +// SIMD-ONLY0: if.end345: +// SIMD-ONLY0-NEXT: [[TMP217:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV346:%.*]] = sext i8 [[TMP217]] to i32 +// SIMD-ONLY0-NEXT: [[TMP218:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV347:%.*]] = sext i8 [[TMP218]] to i32 +// SIMD-ONLY0-NEXT: [[CMP348:%.*]] = icmp eq i32 [[CONV346]], [[CONV347]] +// SIMD-ONLY0-NEXT: [[CONV349:%.*]] = zext i1 [[CMP348]] to i32 +// SIMD-ONLY0-NEXT: [[CONV350:%.*]] = trunc i32 [[CONV349]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV350]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP219:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL351:%.*]] = icmp ne i8 [[TMP219]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL351]], label [[IF_THEN352:%.*]], label [[IF_ELSE353:%.*]] +// SIMD-ONLY0: if.then352: +// SIMD-ONLY0-NEXT: [[TMP220:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP220]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END354:%.*]] +// SIMD-ONLY0: if.else353: +// SIMD-ONLY0-NEXT: [[TMP221:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP221]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END354]] +// SIMD-ONLY0: if.end354: +// SIMD-ONLY0-NEXT: [[TMP222:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP222]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP223:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV355:%.*]] = sext i8 [[TMP223]] to i32 +// SIMD-ONLY0-NEXT: [[TMP224:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV356:%.*]] = sext i8 [[TMP224]] to i32 +// SIMD-ONLY0-NEXT: [[CMP357:%.*]] = icmp sgt i32 [[CONV355]], [[CONV356]] +// SIMD-ONLY0-NEXT: br i1 [[CMP357]], label [[IF_THEN359:%.*]], label [[IF_END360:%.*]] +// SIMD-ONLY0: if.then359: +// SIMD-ONLY0-NEXT: [[TMP225:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP225]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END360]] +// SIMD-ONLY0: if.end360: +// SIMD-ONLY0-NEXT: [[TMP226:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP226]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP227:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV361:%.*]] = sext i8 [[TMP227]] to i32 +// SIMD-ONLY0-NEXT: [[TMP228:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV362:%.*]] = sext i8 [[TMP228]] to i32 +// SIMD-ONLY0-NEXT: [[CMP363:%.*]] = icmp sgt i32 [[CONV361]], [[CONV362]] +// SIMD-ONLY0-NEXT: br i1 [[CMP363]], label [[IF_THEN365:%.*]], label [[IF_END366:%.*]] +// SIMD-ONLY0: if.then365: +// SIMD-ONLY0-NEXT: [[TMP229:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP229]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END366]] +// SIMD-ONLY0: if.end366: +// SIMD-ONLY0-NEXT: [[TMP230:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP230]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP231:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV367:%.*]] = sext i8 [[TMP231]] to i32 +// SIMD-ONLY0-NEXT: [[TMP232:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV368:%.*]] = sext i8 [[TMP232]] to i32 +// SIMD-ONLY0-NEXT: [[CMP369:%.*]] = icmp slt i32 [[CONV367]], [[CONV368]] +// SIMD-ONLY0-NEXT: br i1 [[CMP369]], label [[IF_THEN371:%.*]], label [[IF_END372:%.*]] +// SIMD-ONLY0: if.then371: +// SIMD-ONLY0-NEXT: [[TMP233:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP233]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END372]] +// SIMD-ONLY0: if.end372: +// SIMD-ONLY0-NEXT: [[TMP234:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP234]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP235:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV373:%.*]] = sext i8 [[TMP235]] to i32 +// SIMD-ONLY0-NEXT: [[TMP236:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV374:%.*]] = sext i8 [[TMP236]] to i32 +// SIMD-ONLY0-NEXT: [[CMP375:%.*]] = icmp slt i32 [[CONV373]], [[CONV374]] +// SIMD-ONLY0-NEXT: br i1 [[CMP375]], label [[IF_THEN377:%.*]], label [[IF_END378:%.*]] +// SIMD-ONLY0: if.then377: +// SIMD-ONLY0-NEXT: [[TMP237:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP237]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END378]] +// SIMD-ONLY0: if.end378: +// SIMD-ONLY0-NEXT: [[TMP238:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP238]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP239:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV379:%.*]] = sext i8 [[TMP239]] to i32 +// SIMD-ONLY0-NEXT: [[TMP240:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV380:%.*]] = sext i8 [[TMP240]] to i32 +// SIMD-ONLY0-NEXT: [[CMP381:%.*]] = icmp eq i32 [[CONV379]], [[CONV380]] +// SIMD-ONLY0-NEXT: br i1 [[CMP381]], label [[IF_THEN383:%.*]], label [[IF_END384:%.*]] +// SIMD-ONLY0: if.then383: +// SIMD-ONLY0-NEXT: [[TMP241:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP241]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END384]] +// SIMD-ONLY0: if.end384: +// SIMD-ONLY0-NEXT: [[TMP242:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP242]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP243:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV385:%.*]] = sext i8 [[TMP243]] to i32 +// SIMD-ONLY0-NEXT: [[TMP244:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV386:%.*]] = sext i8 [[TMP244]] to i32 +// SIMD-ONLY0-NEXT: [[CMP387:%.*]] = icmp eq i32 [[CONV385]], [[CONV386]] +// SIMD-ONLY0-NEXT: br i1 [[CMP387]], label [[IF_THEN389:%.*]], label [[IF_END390:%.*]] +// SIMD-ONLY0: if.then389: +// SIMD-ONLY0-NEXT: [[TMP245:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP245]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END390]] +// SIMD-ONLY0: if.end390: +// SIMD-ONLY0-NEXT: [[TMP246:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV391:%.*]] = sext i8 [[TMP246]] to i32 +// SIMD-ONLY0-NEXT: [[TMP247:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV392:%.*]] = sext i8 [[TMP247]] to i32 +// SIMD-ONLY0-NEXT: [[CMP393:%.*]] = icmp sgt i32 [[CONV391]], [[CONV392]] +// SIMD-ONLY0-NEXT: br i1 [[CMP393]], label [[IF_THEN395:%.*]], label [[IF_END396:%.*]] +// SIMD-ONLY0: if.then395: +// SIMD-ONLY0-NEXT: [[TMP248:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP248]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END396]] +// SIMD-ONLY0: if.end396: +// SIMD-ONLY0-NEXT: [[TMP249:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP249]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP250:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV397:%.*]] = sext i8 [[TMP250]] to i32 +// SIMD-ONLY0-NEXT: [[TMP251:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV398:%.*]] = sext i8 [[TMP251]] to i32 +// SIMD-ONLY0-NEXT: [[CMP399:%.*]] = icmp sgt i32 [[CONV397]], [[CONV398]] +// SIMD-ONLY0-NEXT: br i1 [[CMP399]], label [[IF_THEN401:%.*]], label [[IF_END402:%.*]] +// SIMD-ONLY0: if.then401: +// SIMD-ONLY0-NEXT: [[TMP252:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP252]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END402]] +// SIMD-ONLY0: if.end402: +// SIMD-ONLY0-NEXT: [[TMP253:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP253]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP254:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV403:%.*]] = sext i8 [[TMP254]] to i32 +// SIMD-ONLY0-NEXT: [[TMP255:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV404:%.*]] = sext i8 [[TMP255]] to i32 +// SIMD-ONLY0-NEXT: [[CMP405:%.*]] = icmp slt i32 [[CONV403]], [[CONV404]] +// SIMD-ONLY0-NEXT: br i1 [[CMP405]], label [[IF_THEN407:%.*]], label [[IF_END408:%.*]] +// SIMD-ONLY0: if.then407: +// SIMD-ONLY0-NEXT: [[TMP256:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP256]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END408]] +// SIMD-ONLY0: if.end408: +// SIMD-ONLY0-NEXT: [[TMP257:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP257]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP258:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV409:%.*]] = sext i8 [[TMP258]] to i32 +// SIMD-ONLY0-NEXT: [[TMP259:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV410:%.*]] = sext i8 [[TMP259]] to i32 +// SIMD-ONLY0-NEXT: [[CMP411:%.*]] = icmp slt i32 [[CONV409]], [[CONV410]] +// SIMD-ONLY0-NEXT: br i1 [[CMP411]], label [[IF_THEN413:%.*]], label [[IF_END414:%.*]] +// SIMD-ONLY0: if.then413: +// SIMD-ONLY0-NEXT: [[TMP260:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP260]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END414]] +// SIMD-ONLY0: if.end414: +// SIMD-ONLY0-NEXT: [[TMP261:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP261]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP262:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV415:%.*]] = sext i8 [[TMP262]] to i32 +// SIMD-ONLY0-NEXT: [[TMP263:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV416:%.*]] = sext i8 [[TMP263]] to i32 +// SIMD-ONLY0-NEXT: [[CMP417:%.*]] = icmp eq i32 [[CONV415]], [[CONV416]] +// SIMD-ONLY0-NEXT: br i1 [[CMP417]], label [[IF_THEN419:%.*]], label [[IF_END420:%.*]] +// SIMD-ONLY0: if.then419: +// SIMD-ONLY0-NEXT: [[TMP264:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP264]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END420]] +// SIMD-ONLY0: if.end420: +// SIMD-ONLY0-NEXT: [[TMP265:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP265]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP266:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV421:%.*]] = sext i8 [[TMP266]] to i32 +// SIMD-ONLY0-NEXT: [[TMP267:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV422:%.*]] = sext i8 [[TMP267]] to i32 +// SIMD-ONLY0-NEXT: [[CMP423:%.*]] = icmp eq i32 [[CONV421]], [[CONV422]] +// SIMD-ONLY0-NEXT: br i1 [[CMP423]], label [[IF_THEN425:%.*]], label [[IF_END426:%.*]] +// SIMD-ONLY0: if.then425: +// SIMD-ONLY0-NEXT: [[TMP268:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP268]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END426]] +// SIMD-ONLY0: if.end426: +// SIMD-ONLY0-NEXT: [[TMP269:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP269]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP270:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV427:%.*]] = sext i8 [[TMP270]] to i32 +// SIMD-ONLY0-NEXT: [[TMP271:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV428:%.*]] = sext i8 [[TMP271]] to i32 +// SIMD-ONLY0-NEXT: [[CMP429:%.*]] = icmp eq i32 [[CONV427]], [[CONV428]] +// SIMD-ONLY0-NEXT: br i1 [[CMP429]], label [[IF_THEN431:%.*]], label [[IF_ELSE432:%.*]] +// SIMD-ONLY0: if.then431: +// SIMD-ONLY0-NEXT: [[TMP272:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP272]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END433:%.*]] +// SIMD-ONLY0: if.else432: +// SIMD-ONLY0-NEXT: [[TMP273:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP273]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END433]] +// SIMD-ONLY0: if.end433: +// SIMD-ONLY0-NEXT: [[TMP274:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV434:%.*]] = sext i8 [[TMP274]] to i32 +// SIMD-ONLY0-NEXT: [[TMP275:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV435:%.*]] = sext i8 [[TMP275]] to i32 +// SIMD-ONLY0-NEXT: [[CMP436:%.*]] = icmp eq i32 [[CONV434]], [[CONV435]] +// SIMD-ONLY0-NEXT: br i1 [[CMP436]], label [[IF_THEN438:%.*]], label [[IF_ELSE439:%.*]] +// SIMD-ONLY0: if.then438: +// SIMD-ONLY0-NEXT: [[TMP276:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP276]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END440:%.*]] +// SIMD-ONLY0: if.else439: +// SIMD-ONLY0-NEXT: [[TMP277:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP277]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END440]] +// SIMD-ONLY0: if.end440: +// SIMD-ONLY0-NEXT: [[TMP278:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV441:%.*]] = sext i8 [[TMP278]] to i32 +// SIMD-ONLY0-NEXT: [[TMP279:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV442:%.*]] = sext i8 [[TMP279]] to i32 +// SIMD-ONLY0-NEXT: [[CMP443:%.*]] = icmp eq i32 [[CONV441]], [[CONV442]] +// SIMD-ONLY0-NEXT: [[CONV444:%.*]] = zext i1 [[CMP443]] to i32 +// SIMD-ONLY0-NEXT: [[CONV445:%.*]] = trunc i32 [[CONV444]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV445]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP280:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL446:%.*]] = icmp ne i8 [[TMP280]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL446]], label [[IF_THEN447:%.*]], label [[IF_END448:%.*]] +// SIMD-ONLY0: if.then447: +// SIMD-ONLY0-NEXT: [[TMP281:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP281]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END448]] +// SIMD-ONLY0: if.end448: +// SIMD-ONLY0-NEXT: [[TMP282:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV449:%.*]] = sext i8 [[TMP282]] to i32 +// SIMD-ONLY0-NEXT: [[TMP283:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV450:%.*]] = sext i8 [[TMP283]] to i32 +// SIMD-ONLY0-NEXT: [[CMP451:%.*]] = icmp eq i32 [[CONV449]], [[CONV450]] +// SIMD-ONLY0-NEXT: [[CONV452:%.*]] = zext i1 [[CMP451]] to i32 +// SIMD-ONLY0-NEXT: [[CONV453:%.*]] = trunc i32 [[CONV452]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV453]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP284:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL454:%.*]] = icmp ne i8 [[TMP284]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL454]], label [[IF_THEN455:%.*]], label [[IF_END456:%.*]] +// SIMD-ONLY0: if.then455: +// SIMD-ONLY0-NEXT: [[TMP285:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP285]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END456]] +// SIMD-ONLY0: if.end456: +// SIMD-ONLY0-NEXT: [[TMP286:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV457:%.*]] = sext i8 [[TMP286]] to i32 +// SIMD-ONLY0-NEXT: [[TMP287:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV458:%.*]] = sext i8 [[TMP287]] to i32 +// SIMD-ONLY0-NEXT: [[CMP459:%.*]] = icmp eq i32 [[CONV457]], [[CONV458]] +// SIMD-ONLY0-NEXT: [[CONV460:%.*]] = zext i1 [[CMP459]] to i32 +// SIMD-ONLY0-NEXT: [[CONV461:%.*]] = trunc i32 [[CONV460]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV461]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP288:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL462:%.*]] = icmp ne i8 [[TMP288]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL462]], label [[IF_THEN463:%.*]], label [[IF_ELSE464:%.*]] +// SIMD-ONLY0: if.then463: +// SIMD-ONLY0-NEXT: [[TMP289:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP289]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END465:%.*]] +// SIMD-ONLY0: if.else464: +// SIMD-ONLY0-NEXT: [[TMP290:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP290]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END465]] +// SIMD-ONLY0: if.end465: +// SIMD-ONLY0-NEXT: [[TMP291:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV466:%.*]] = sext i8 [[TMP291]] to i32 +// SIMD-ONLY0-NEXT: [[TMP292:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV467:%.*]] = sext i8 [[TMP292]] to i32 +// SIMD-ONLY0-NEXT: [[CMP468:%.*]] = icmp eq i32 [[CONV466]], [[CONV467]] +// SIMD-ONLY0-NEXT: [[CONV469:%.*]] = zext i1 [[CMP468]] to i32 +// SIMD-ONLY0-NEXT: [[CONV470:%.*]] = trunc i32 [[CONV469]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV470]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP293:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL471:%.*]] = icmp ne i8 [[TMP293]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL471]], label [[IF_THEN472:%.*]], label [[IF_ELSE473:%.*]] +// SIMD-ONLY0: if.then472: +// SIMD-ONLY0-NEXT: [[TMP294:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP294]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END474:%.*]] +// SIMD-ONLY0: if.else473: +// SIMD-ONLY0-NEXT: [[TMP295:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP295]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END474]] +// SIMD-ONLY0: if.end474: +// SIMD-ONLY0-NEXT: [[TMP296:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP296]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP297:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV475:%.*]] = sext i8 [[TMP297]] to i32 +// SIMD-ONLY0-NEXT: [[TMP298:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV476:%.*]] = sext i8 [[TMP298]] to i32 +// SIMD-ONLY0-NEXT: [[CMP477:%.*]] = icmp sgt i32 [[CONV475]], [[CONV476]] +// SIMD-ONLY0-NEXT: br i1 [[CMP477]], label [[IF_THEN479:%.*]], label [[IF_END480:%.*]] +// SIMD-ONLY0: if.then479: +// SIMD-ONLY0-NEXT: [[TMP299:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP299]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END480]] +// SIMD-ONLY0: if.end480: +// SIMD-ONLY0-NEXT: [[TMP300:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP300]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP301:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV481:%.*]] = sext i8 [[TMP301]] to i32 +// SIMD-ONLY0-NEXT: [[TMP302:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV482:%.*]] = sext i8 [[TMP302]] to i32 +// SIMD-ONLY0-NEXT: [[CMP483:%.*]] = icmp sgt i32 [[CONV481]], [[CONV482]] +// SIMD-ONLY0-NEXT: br i1 [[CMP483]], label [[IF_THEN485:%.*]], label [[IF_END486:%.*]] +// SIMD-ONLY0: if.then485: +// SIMD-ONLY0-NEXT: [[TMP303:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP303]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END486]] +// SIMD-ONLY0: if.end486: +// SIMD-ONLY0-NEXT: [[TMP304:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP304]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP305:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV487:%.*]] = sext i8 [[TMP305]] to i32 +// SIMD-ONLY0-NEXT: [[TMP306:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV488:%.*]] = sext i8 [[TMP306]] to i32 +// SIMD-ONLY0-NEXT: [[CMP489:%.*]] = icmp slt i32 [[CONV487]], [[CONV488]] +// SIMD-ONLY0-NEXT: br i1 [[CMP489]], label [[IF_THEN491:%.*]], label [[IF_END492:%.*]] +// SIMD-ONLY0: if.then491: +// SIMD-ONLY0-NEXT: [[TMP307:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP307]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END492]] +// SIMD-ONLY0: if.end492: +// SIMD-ONLY0-NEXT: [[TMP308:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP308]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP309:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV493:%.*]] = sext i8 [[TMP309]] to i32 +// SIMD-ONLY0-NEXT: [[TMP310:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV494:%.*]] = sext i8 [[TMP310]] to i32 +// SIMD-ONLY0-NEXT: [[CMP495:%.*]] = icmp slt i32 [[CONV493]], [[CONV494]] +// SIMD-ONLY0-NEXT: br i1 [[CMP495]], label [[IF_THEN497:%.*]], label [[IF_END498:%.*]] +// SIMD-ONLY0: if.then497: +// SIMD-ONLY0-NEXT: [[TMP311:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP311]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END498]] +// SIMD-ONLY0: if.end498: +// SIMD-ONLY0-NEXT: [[TMP312:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP312]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP313:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV499:%.*]] = sext i8 [[TMP313]] to i32 +// SIMD-ONLY0-NEXT: [[TMP314:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV500:%.*]] = sext i8 [[TMP314]] to i32 +// SIMD-ONLY0-NEXT: [[CMP501:%.*]] = icmp eq i32 [[CONV499]], [[CONV500]] +// SIMD-ONLY0-NEXT: br i1 [[CMP501]], label [[IF_THEN503:%.*]], label [[IF_END504:%.*]] +// SIMD-ONLY0: if.then503: +// SIMD-ONLY0-NEXT: [[TMP315:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP315]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END504]] +// SIMD-ONLY0: if.end504: +// SIMD-ONLY0-NEXT: [[TMP316:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP316]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP317:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV505:%.*]] = sext i8 [[TMP317]] to i32 +// SIMD-ONLY0-NEXT: [[TMP318:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV506:%.*]] = sext i8 [[TMP318]] to i32 +// SIMD-ONLY0-NEXT: [[CMP507:%.*]] = icmp eq i32 [[CONV505]], [[CONV506]] +// SIMD-ONLY0-NEXT: br i1 [[CMP507]], label [[IF_THEN509:%.*]], label [[IF_END510:%.*]] +// SIMD-ONLY0: if.then509: +// SIMD-ONLY0-NEXT: [[TMP319:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP319]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END510]] +// SIMD-ONLY0: if.end510: +// SIMD-ONLY0-NEXT: [[TMP320:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV511:%.*]] = sext i8 [[TMP320]] to i32 +// SIMD-ONLY0-NEXT: [[TMP321:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV512:%.*]] = sext i8 [[TMP321]] to i32 +// SIMD-ONLY0-NEXT: [[CMP513:%.*]] = icmp sgt i32 [[CONV511]], [[CONV512]] +// SIMD-ONLY0-NEXT: br i1 [[CMP513]], label [[IF_THEN515:%.*]], label [[IF_END516:%.*]] +// SIMD-ONLY0: if.then515: +// SIMD-ONLY0-NEXT: [[TMP322:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP322]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END516]] +// SIMD-ONLY0: if.end516: +// SIMD-ONLY0-NEXT: [[TMP323:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP323]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP324:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV517:%.*]] = sext i8 [[TMP324]] to i32 +// SIMD-ONLY0-NEXT: [[TMP325:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV518:%.*]] = sext i8 [[TMP325]] to i32 +// SIMD-ONLY0-NEXT: [[CMP519:%.*]] = icmp sgt i32 [[CONV517]], [[CONV518]] +// SIMD-ONLY0-NEXT: br i1 [[CMP519]], label [[IF_THEN521:%.*]], label [[IF_END522:%.*]] +// SIMD-ONLY0: if.then521: +// SIMD-ONLY0-NEXT: [[TMP326:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP326]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END522]] +// SIMD-ONLY0: if.end522: +// SIMD-ONLY0-NEXT: [[TMP327:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP327]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP328:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV523:%.*]] = sext i8 [[TMP328]] to i32 +// SIMD-ONLY0-NEXT: [[TMP329:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV524:%.*]] = sext i8 [[TMP329]] to i32 +// SIMD-ONLY0-NEXT: [[CMP525:%.*]] = icmp slt i32 [[CONV523]], [[CONV524]] +// SIMD-ONLY0-NEXT: br i1 [[CMP525]], label [[IF_THEN527:%.*]], label [[IF_END528:%.*]] +// SIMD-ONLY0: if.then527: +// SIMD-ONLY0-NEXT: [[TMP330:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP330]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END528]] +// SIMD-ONLY0: if.end528: +// SIMD-ONLY0-NEXT: [[TMP331:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP331]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP332:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV529:%.*]] = sext i8 [[TMP332]] to i32 +// SIMD-ONLY0-NEXT: [[TMP333:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV530:%.*]] = sext i8 [[TMP333]] to i32 +// SIMD-ONLY0-NEXT: [[CMP531:%.*]] = icmp slt i32 [[CONV529]], [[CONV530]] +// SIMD-ONLY0-NEXT: br i1 [[CMP531]], label [[IF_THEN533:%.*]], label [[IF_END534:%.*]] +// SIMD-ONLY0: if.then533: +// SIMD-ONLY0-NEXT: [[TMP334:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP334]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END534]] +// SIMD-ONLY0: if.end534: +// SIMD-ONLY0-NEXT: [[TMP335:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP335]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP336:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV535:%.*]] = sext i8 [[TMP336]] to i32 +// SIMD-ONLY0-NEXT: [[TMP337:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV536:%.*]] = sext i8 [[TMP337]] to i32 +// SIMD-ONLY0-NEXT: [[CMP537:%.*]] = icmp eq i32 [[CONV535]], [[CONV536]] +// SIMD-ONLY0-NEXT: br i1 [[CMP537]], label [[IF_THEN539:%.*]], label [[IF_END540:%.*]] +// SIMD-ONLY0: if.then539: +// SIMD-ONLY0-NEXT: [[TMP338:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP338]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END540]] +// SIMD-ONLY0: if.end540: +// SIMD-ONLY0-NEXT: [[TMP339:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP339]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP340:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV541:%.*]] = sext i8 [[TMP340]] to i32 +// SIMD-ONLY0-NEXT: [[TMP341:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV542:%.*]] = sext i8 [[TMP341]] to i32 +// SIMD-ONLY0-NEXT: [[CMP543:%.*]] = icmp eq i32 [[CONV541]], [[CONV542]] +// SIMD-ONLY0-NEXT: br i1 [[CMP543]], label [[IF_THEN545:%.*]], label [[IF_END546:%.*]] +// SIMD-ONLY0: if.then545: +// SIMD-ONLY0-NEXT: [[TMP342:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP342]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END546]] +// SIMD-ONLY0: if.end546: +// SIMD-ONLY0-NEXT: [[TMP343:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP343]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP344:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV547:%.*]] = sext i8 [[TMP344]] to i32 +// SIMD-ONLY0-NEXT: [[TMP345:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV548:%.*]] = sext i8 [[TMP345]] to i32 +// SIMD-ONLY0-NEXT: [[CMP549:%.*]] = icmp eq i32 [[CONV547]], [[CONV548]] +// SIMD-ONLY0-NEXT: br i1 [[CMP549]], label [[IF_THEN551:%.*]], label [[IF_ELSE552:%.*]] +// SIMD-ONLY0: if.then551: +// SIMD-ONLY0-NEXT: [[TMP346:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP346]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END553:%.*]] +// SIMD-ONLY0: if.else552: +// SIMD-ONLY0-NEXT: [[TMP347:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP347]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END553]] +// SIMD-ONLY0: if.end553: +// SIMD-ONLY0-NEXT: [[TMP348:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV554:%.*]] = sext i8 [[TMP348]] to i32 +// SIMD-ONLY0-NEXT: [[TMP349:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV555:%.*]] = sext i8 [[TMP349]] to i32 +// SIMD-ONLY0-NEXT: [[CMP556:%.*]] = icmp eq i32 [[CONV554]], [[CONV555]] +// SIMD-ONLY0-NEXT: br i1 [[CMP556]], label [[IF_THEN558:%.*]], label [[IF_ELSE559:%.*]] +// SIMD-ONLY0: if.then558: +// SIMD-ONLY0-NEXT: [[TMP350:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP350]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END560:%.*]] +// SIMD-ONLY0: if.else559: +// SIMD-ONLY0-NEXT: [[TMP351:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP351]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END560]] +// SIMD-ONLY0: if.end560: +// SIMD-ONLY0-NEXT: [[TMP352:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV561:%.*]] = sext i8 [[TMP352]] to i32 +// SIMD-ONLY0-NEXT: [[TMP353:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV562:%.*]] = sext i8 [[TMP353]] to i32 +// SIMD-ONLY0-NEXT: [[CMP563:%.*]] = icmp eq i32 [[CONV561]], [[CONV562]] +// SIMD-ONLY0-NEXT: [[CONV564:%.*]] = zext i1 [[CMP563]] to i32 +// SIMD-ONLY0-NEXT: [[CONV565:%.*]] = trunc i32 [[CONV564]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV565]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP354:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL566:%.*]] = icmp ne i8 [[TMP354]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL566]], label [[IF_THEN567:%.*]], label [[IF_END568:%.*]] +// SIMD-ONLY0: if.then567: +// SIMD-ONLY0-NEXT: [[TMP355:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP355]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END568]] +// SIMD-ONLY0: if.end568: +// SIMD-ONLY0-NEXT: [[TMP356:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV569:%.*]] = sext i8 [[TMP356]] to i32 +// SIMD-ONLY0-NEXT: [[TMP357:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV570:%.*]] = sext i8 [[TMP357]] to i32 +// SIMD-ONLY0-NEXT: [[CMP571:%.*]] = icmp eq i32 [[CONV569]], [[CONV570]] +// SIMD-ONLY0-NEXT: [[CONV572:%.*]] = zext i1 [[CMP571]] to i32 +// SIMD-ONLY0-NEXT: [[CONV573:%.*]] = trunc i32 [[CONV572]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV573]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP358:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL574:%.*]] = icmp ne i8 [[TMP358]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL574]], label [[IF_THEN575:%.*]], label [[IF_END576:%.*]] +// SIMD-ONLY0: if.then575: +// SIMD-ONLY0-NEXT: [[TMP359:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP359]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END576]] +// SIMD-ONLY0: if.end576: +// SIMD-ONLY0-NEXT: [[TMP360:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV577:%.*]] = sext i8 [[TMP360]] to i32 +// SIMD-ONLY0-NEXT: [[TMP361:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV578:%.*]] = sext i8 [[TMP361]] to i32 +// SIMD-ONLY0-NEXT: [[CMP579:%.*]] = icmp eq i32 [[CONV577]], [[CONV578]] +// SIMD-ONLY0-NEXT: [[CONV580:%.*]] = zext i1 [[CMP579]] to i32 +// SIMD-ONLY0-NEXT: [[CONV581:%.*]] = trunc i32 [[CONV580]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV581]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP362:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL582:%.*]] = icmp ne i8 [[TMP362]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL582]], label [[IF_THEN583:%.*]], label [[IF_ELSE584:%.*]] +// SIMD-ONLY0: if.then583: +// SIMD-ONLY0-NEXT: [[TMP363:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP363]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END585:%.*]] +// SIMD-ONLY0: if.else584: +// SIMD-ONLY0-NEXT: [[TMP364:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP364]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END585]] +// SIMD-ONLY0: if.end585: +// SIMD-ONLY0-NEXT: [[TMP365:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV586:%.*]] = sext i8 [[TMP365]] to i32 +// SIMD-ONLY0-NEXT: [[TMP366:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV587:%.*]] = sext i8 [[TMP366]] to i32 +// SIMD-ONLY0-NEXT: [[CMP588:%.*]] = icmp eq i32 [[CONV586]], [[CONV587]] +// SIMD-ONLY0-NEXT: [[CONV589:%.*]] = zext i1 [[CMP588]] to i32 +// SIMD-ONLY0-NEXT: [[CONV590:%.*]] = trunc i32 [[CONV589]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV590]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP367:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL591:%.*]] = icmp ne i8 [[TMP367]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL591]], label [[IF_THEN592:%.*]], label [[IF_ELSE593:%.*]] +// SIMD-ONLY0: if.then592: +// SIMD-ONLY0-NEXT: [[TMP368:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP368]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END594:%.*]] +// SIMD-ONLY0: if.else593: +// SIMD-ONLY0-NEXT: [[TMP369:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP369]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END594]] +// SIMD-ONLY0: if.end594: +// SIMD-ONLY0-NEXT: [[TMP370:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP370]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP371:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV595:%.*]] = sext i8 [[TMP371]] to i32 +// SIMD-ONLY0-NEXT: [[TMP372:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV596:%.*]] = sext i8 [[TMP372]] to i32 +// SIMD-ONLY0-NEXT: [[CMP597:%.*]] = icmp sgt i32 [[CONV595]], [[CONV596]] +// SIMD-ONLY0-NEXT: br i1 [[CMP597]], label [[IF_THEN599:%.*]], label [[IF_END600:%.*]] +// SIMD-ONLY0: if.then599: +// SIMD-ONLY0-NEXT: [[TMP373:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP373]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END600]] +// SIMD-ONLY0: if.end600: +// SIMD-ONLY0-NEXT: [[TMP374:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP374]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP375:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV601:%.*]] = sext i8 [[TMP375]] to i32 +// SIMD-ONLY0-NEXT: [[TMP376:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV602:%.*]] = sext i8 [[TMP376]] to i32 +// SIMD-ONLY0-NEXT: [[CMP603:%.*]] = icmp sgt i32 [[CONV601]], [[CONV602]] +// SIMD-ONLY0-NEXT: br i1 [[CMP603]], label [[IF_THEN605:%.*]], label [[IF_END606:%.*]] +// SIMD-ONLY0: if.then605: +// SIMD-ONLY0-NEXT: [[TMP377:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP377]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END606]] +// SIMD-ONLY0: if.end606: +// SIMD-ONLY0-NEXT: [[TMP378:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP378]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP379:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV607:%.*]] = sext i8 [[TMP379]] to i32 +// SIMD-ONLY0-NEXT: [[TMP380:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV608:%.*]] = sext i8 [[TMP380]] to i32 +// SIMD-ONLY0-NEXT: [[CMP609:%.*]] = icmp slt i32 [[CONV607]], [[CONV608]] +// SIMD-ONLY0-NEXT: br i1 [[CMP609]], label [[IF_THEN611:%.*]], label [[IF_END612:%.*]] +// SIMD-ONLY0: if.then611: +// SIMD-ONLY0-NEXT: [[TMP381:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP381]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END612]] +// SIMD-ONLY0: if.end612: +// SIMD-ONLY0-NEXT: [[TMP382:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP382]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP383:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV613:%.*]] = sext i8 [[TMP383]] to i32 +// SIMD-ONLY0-NEXT: [[TMP384:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV614:%.*]] = sext i8 [[TMP384]] to i32 +// SIMD-ONLY0-NEXT: [[CMP615:%.*]] = icmp slt i32 [[CONV613]], [[CONV614]] +// SIMD-ONLY0-NEXT: br i1 [[CMP615]], label [[IF_THEN617:%.*]], label [[IF_END618:%.*]] +// SIMD-ONLY0: if.then617: +// SIMD-ONLY0-NEXT: [[TMP385:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP385]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END618]] +// SIMD-ONLY0: if.end618: +// SIMD-ONLY0-NEXT: [[TMP386:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP386]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP387:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV619:%.*]] = sext i8 [[TMP387]] to i32 +// SIMD-ONLY0-NEXT: [[TMP388:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV620:%.*]] = sext i8 [[TMP388]] to i32 +// SIMD-ONLY0-NEXT: [[CMP621:%.*]] = icmp eq i32 [[CONV619]], [[CONV620]] +// SIMD-ONLY0-NEXT: br i1 [[CMP621]], label [[IF_THEN623:%.*]], label [[IF_END624:%.*]] +// SIMD-ONLY0: if.then623: +// SIMD-ONLY0-NEXT: [[TMP389:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP389]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END624]] +// SIMD-ONLY0: if.end624: +// SIMD-ONLY0-NEXT: [[TMP390:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP390]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP391:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV625:%.*]] = sext i8 [[TMP391]] to i32 +// SIMD-ONLY0-NEXT: [[TMP392:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV626:%.*]] = sext i8 [[TMP392]] to i32 +// SIMD-ONLY0-NEXT: [[CMP627:%.*]] = icmp eq i32 [[CONV625]], [[CONV626]] +// SIMD-ONLY0-NEXT: br i1 [[CMP627]], label [[IF_THEN629:%.*]], label [[IF_END630:%.*]] +// SIMD-ONLY0: if.then629: +// SIMD-ONLY0-NEXT: [[TMP393:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP393]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END630]] +// SIMD-ONLY0: if.end630: +// SIMD-ONLY0-NEXT: [[TMP394:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV631:%.*]] = sext i8 [[TMP394]] to i32 +// SIMD-ONLY0-NEXT: [[TMP395:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV632:%.*]] = sext i8 [[TMP395]] to i32 +// SIMD-ONLY0-NEXT: [[CMP633:%.*]] = icmp sgt i32 [[CONV631]], [[CONV632]] +// SIMD-ONLY0-NEXT: br i1 [[CMP633]], label [[IF_THEN635:%.*]], label [[IF_END636:%.*]] +// SIMD-ONLY0: if.then635: +// SIMD-ONLY0-NEXT: [[TMP396:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP396]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END636]] +// SIMD-ONLY0: if.end636: +// SIMD-ONLY0-NEXT: [[TMP397:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP397]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP398:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV637:%.*]] = sext i8 [[TMP398]] to i32 +// SIMD-ONLY0-NEXT: [[TMP399:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV638:%.*]] = sext i8 [[TMP399]] to i32 +// SIMD-ONLY0-NEXT: [[CMP639:%.*]] = icmp sgt i32 [[CONV637]], [[CONV638]] +// SIMD-ONLY0-NEXT: br i1 [[CMP639]], label [[IF_THEN641:%.*]], label [[IF_END642:%.*]] +// SIMD-ONLY0: if.then641: +// SIMD-ONLY0-NEXT: [[TMP400:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP400]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END642]] +// SIMD-ONLY0: if.end642: +// SIMD-ONLY0-NEXT: [[TMP401:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP401]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP402:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV643:%.*]] = sext i8 [[TMP402]] to i32 +// SIMD-ONLY0-NEXT: [[TMP403:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV644:%.*]] = sext i8 [[TMP403]] to i32 +// SIMD-ONLY0-NEXT: [[CMP645:%.*]] = icmp slt i32 [[CONV643]], [[CONV644]] +// SIMD-ONLY0-NEXT: br i1 [[CMP645]], label [[IF_THEN647:%.*]], label [[IF_END648:%.*]] +// SIMD-ONLY0: if.then647: +// SIMD-ONLY0-NEXT: [[TMP404:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP404]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END648]] +// SIMD-ONLY0: if.end648: +// SIMD-ONLY0-NEXT: [[TMP405:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP405]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP406:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV649:%.*]] = sext i8 [[TMP406]] to i32 +// SIMD-ONLY0-NEXT: [[TMP407:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV650:%.*]] = sext i8 [[TMP407]] to i32 +// SIMD-ONLY0-NEXT: [[CMP651:%.*]] = icmp slt i32 [[CONV649]], [[CONV650]] +// SIMD-ONLY0-NEXT: br i1 [[CMP651]], label [[IF_THEN653:%.*]], label [[IF_END654:%.*]] +// SIMD-ONLY0: if.then653: +// SIMD-ONLY0-NEXT: [[TMP408:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP408]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END654]] +// SIMD-ONLY0: if.end654: +// SIMD-ONLY0-NEXT: [[TMP409:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP409]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP410:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV655:%.*]] = sext i8 [[TMP410]] to i32 +// SIMD-ONLY0-NEXT: [[TMP411:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV656:%.*]] = sext i8 [[TMP411]] to i32 +// SIMD-ONLY0-NEXT: [[CMP657:%.*]] = icmp eq i32 [[CONV655]], [[CONV656]] +// SIMD-ONLY0-NEXT: br i1 [[CMP657]], label [[IF_THEN659:%.*]], label [[IF_END660:%.*]] +// SIMD-ONLY0: if.then659: +// SIMD-ONLY0-NEXT: [[TMP412:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP412]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END660]] +// SIMD-ONLY0: if.end660: +// SIMD-ONLY0-NEXT: [[TMP413:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP413]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP414:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV661:%.*]] = sext i8 [[TMP414]] to i32 +// SIMD-ONLY0-NEXT: [[TMP415:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV662:%.*]] = sext i8 [[TMP415]] to i32 +// SIMD-ONLY0-NEXT: [[CMP663:%.*]] = icmp eq i32 [[CONV661]], [[CONV662]] +// SIMD-ONLY0-NEXT: br i1 [[CMP663]], label [[IF_THEN665:%.*]], label [[IF_END666:%.*]] +// SIMD-ONLY0: if.then665: +// SIMD-ONLY0-NEXT: [[TMP416:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP416]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END666]] +// SIMD-ONLY0: if.end666: +// SIMD-ONLY0-NEXT: [[TMP417:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP417]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP418:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV667:%.*]] = sext i8 [[TMP418]] to i32 +// SIMD-ONLY0-NEXT: [[TMP419:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV668:%.*]] = sext i8 [[TMP419]] to i32 +// SIMD-ONLY0-NEXT: [[CMP669:%.*]] = icmp eq i32 [[CONV667]], [[CONV668]] +// SIMD-ONLY0-NEXT: br i1 [[CMP669]], label [[IF_THEN671:%.*]], label [[IF_ELSE672:%.*]] +// SIMD-ONLY0: if.then671: +// SIMD-ONLY0-NEXT: [[TMP420:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP420]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END673:%.*]] +// SIMD-ONLY0: if.else672: +// SIMD-ONLY0-NEXT: [[TMP421:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP421]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END673]] +// SIMD-ONLY0: if.end673: +// SIMD-ONLY0-NEXT: [[TMP422:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV674:%.*]] = sext i8 [[TMP422]] to i32 +// SIMD-ONLY0-NEXT: [[TMP423:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV675:%.*]] = sext i8 [[TMP423]] to i32 +// SIMD-ONLY0-NEXT: [[CMP676:%.*]] = icmp eq i32 [[CONV674]], [[CONV675]] +// SIMD-ONLY0-NEXT: br i1 [[CMP676]], label [[IF_THEN678:%.*]], label [[IF_ELSE679:%.*]] +// SIMD-ONLY0: if.then678: +// SIMD-ONLY0-NEXT: [[TMP424:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP424]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END680:%.*]] +// SIMD-ONLY0: if.else679: +// SIMD-ONLY0-NEXT: [[TMP425:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP425]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END680]] +// SIMD-ONLY0: if.end680: +// SIMD-ONLY0-NEXT: [[TMP426:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV681:%.*]] = sext i8 [[TMP426]] to i32 +// SIMD-ONLY0-NEXT: [[TMP427:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV682:%.*]] = sext i8 [[TMP427]] to i32 +// SIMD-ONLY0-NEXT: [[CMP683:%.*]] = icmp eq i32 [[CONV681]], [[CONV682]] +// SIMD-ONLY0-NEXT: [[CONV684:%.*]] = zext i1 [[CMP683]] to i32 +// SIMD-ONLY0-NEXT: [[CONV685:%.*]] = trunc i32 [[CONV684]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV685]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP428:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL686:%.*]] = icmp ne i8 [[TMP428]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL686]], label [[IF_THEN687:%.*]], label [[IF_END688:%.*]] +// SIMD-ONLY0: if.then687: +// SIMD-ONLY0-NEXT: [[TMP429:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP429]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END688]] +// SIMD-ONLY0: if.end688: +// SIMD-ONLY0-NEXT: [[TMP430:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV689:%.*]] = sext i8 [[TMP430]] to i32 +// SIMD-ONLY0-NEXT: [[TMP431:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV690:%.*]] = sext i8 [[TMP431]] to i32 +// SIMD-ONLY0-NEXT: [[CMP691:%.*]] = icmp eq i32 [[CONV689]], [[CONV690]] +// SIMD-ONLY0-NEXT: [[CONV692:%.*]] = zext i1 [[CMP691]] to i32 +// SIMD-ONLY0-NEXT: [[CONV693:%.*]] = trunc i32 [[CONV692]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV693]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP432:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL694:%.*]] = icmp ne i8 [[TMP432]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL694]], label [[IF_THEN695:%.*]], label [[IF_END696:%.*]] +// SIMD-ONLY0: if.then695: +// SIMD-ONLY0-NEXT: [[TMP433:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP433]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END696]] +// SIMD-ONLY0: if.end696: +// SIMD-ONLY0-NEXT: [[TMP434:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV697:%.*]] = sext i8 [[TMP434]] to i32 +// SIMD-ONLY0-NEXT: [[TMP435:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV698:%.*]] = sext i8 [[TMP435]] to i32 +// SIMD-ONLY0-NEXT: [[CMP699:%.*]] = icmp eq i32 [[CONV697]], [[CONV698]] +// SIMD-ONLY0-NEXT: [[CONV700:%.*]] = zext i1 [[CMP699]] to i32 +// SIMD-ONLY0-NEXT: [[CONV701:%.*]] = trunc i32 [[CONV700]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV701]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP436:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL702:%.*]] = icmp ne i8 [[TMP436]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL702]], label [[IF_THEN703:%.*]], label [[IF_ELSE704:%.*]] +// SIMD-ONLY0: if.then703: +// SIMD-ONLY0-NEXT: [[TMP437:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP437]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END705:%.*]] +// SIMD-ONLY0: if.else704: +// SIMD-ONLY0-NEXT: [[TMP438:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP438]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END705]] +// SIMD-ONLY0: if.end705: +// SIMD-ONLY0-NEXT: [[TMP439:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV706:%.*]] = sext i8 [[TMP439]] to i32 +// SIMD-ONLY0-NEXT: [[TMP440:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV707:%.*]] = sext i8 [[TMP440]] to i32 +// SIMD-ONLY0-NEXT: [[CMP708:%.*]] = icmp eq i32 [[CONV706]], [[CONV707]] +// SIMD-ONLY0-NEXT: [[CONV709:%.*]] = zext i1 [[CMP708]] to i32 +// SIMD-ONLY0-NEXT: [[CONV710:%.*]] = trunc i32 [[CONV709]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV710]], ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP441:%.*]] = load i8, ptr [[CR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL711:%.*]] = icmp ne i8 [[TMP441]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL711]], label [[IF_THEN712:%.*]], label [[IF_ELSE713:%.*]] +// SIMD-ONLY0: if.then712: +// SIMD-ONLY0-NEXT: [[TMP442:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP442]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END714:%.*]] +// SIMD-ONLY0: if.else713: +// SIMD-ONLY0-NEXT: [[TMP443:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP443]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END714]] +// SIMD-ONLY0: if.end714: +// SIMD-ONLY0-NEXT: [[TMP444:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP444]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP445:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV715:%.*]] = zext i8 [[TMP445]] to i32 +// SIMD-ONLY0-NEXT: [[TMP446:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV716:%.*]] = zext i8 [[TMP446]] to i32 +// SIMD-ONLY0-NEXT: [[CMP717:%.*]] = icmp sgt i32 [[CONV715]], [[CONV716]] +// SIMD-ONLY0-NEXT: br i1 [[CMP717]], label [[IF_THEN719:%.*]], label [[IF_END720:%.*]] +// SIMD-ONLY0: if.then719: +// SIMD-ONLY0-NEXT: [[TMP447:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP447]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END720]] +// SIMD-ONLY0: if.end720: +// SIMD-ONLY0-NEXT: [[TMP448:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP448]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP449:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV721:%.*]] = zext i8 [[TMP449]] to i32 +// SIMD-ONLY0-NEXT: [[TMP450:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV722:%.*]] = zext i8 [[TMP450]] to i32 +// SIMD-ONLY0-NEXT: [[CMP723:%.*]] = icmp sgt i32 [[CONV721]], [[CONV722]] +// SIMD-ONLY0-NEXT: br i1 [[CMP723]], label [[IF_THEN725:%.*]], label [[IF_END726:%.*]] +// SIMD-ONLY0: if.then725: +// SIMD-ONLY0-NEXT: [[TMP451:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP451]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END726]] +// SIMD-ONLY0: if.end726: +// SIMD-ONLY0-NEXT: [[TMP452:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP452]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP453:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV727:%.*]] = zext i8 [[TMP453]] to i32 +// SIMD-ONLY0-NEXT: [[TMP454:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV728:%.*]] = zext i8 [[TMP454]] to i32 +// SIMD-ONLY0-NEXT: [[CMP729:%.*]] = icmp slt i32 [[CONV727]], [[CONV728]] +// SIMD-ONLY0-NEXT: br i1 [[CMP729]], label [[IF_THEN731:%.*]], label [[IF_END732:%.*]] +// SIMD-ONLY0: if.then731: +// SIMD-ONLY0-NEXT: [[TMP455:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP455]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END732]] +// SIMD-ONLY0: if.end732: +// SIMD-ONLY0-NEXT: [[TMP456:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP456]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP457:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV733:%.*]] = zext i8 [[TMP457]] to i32 +// SIMD-ONLY0-NEXT: [[TMP458:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV734:%.*]] = zext i8 [[TMP458]] to i32 +// SIMD-ONLY0-NEXT: [[CMP735:%.*]] = icmp slt i32 [[CONV733]], [[CONV734]] +// SIMD-ONLY0-NEXT: br i1 [[CMP735]], label [[IF_THEN737:%.*]], label [[IF_END738:%.*]] +// SIMD-ONLY0: if.then737: +// SIMD-ONLY0-NEXT: [[TMP459:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP459]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END738]] +// SIMD-ONLY0: if.end738: +// SIMD-ONLY0-NEXT: [[TMP460:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP460]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP461:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV739:%.*]] = zext i8 [[TMP461]] to i32 +// SIMD-ONLY0-NEXT: [[TMP462:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV740:%.*]] = zext i8 [[TMP462]] to i32 +// SIMD-ONLY0-NEXT: [[CMP741:%.*]] = icmp eq i32 [[CONV739]], [[CONV740]] +// SIMD-ONLY0-NEXT: br i1 [[CMP741]], label [[IF_THEN743:%.*]], label [[IF_END744:%.*]] +// SIMD-ONLY0: if.then743: +// SIMD-ONLY0-NEXT: [[TMP463:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP463]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END744]] +// SIMD-ONLY0: if.end744: +// SIMD-ONLY0-NEXT: [[TMP464:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP464]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP465:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV745:%.*]] = zext i8 [[TMP465]] to i32 +// SIMD-ONLY0-NEXT: [[TMP466:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV746:%.*]] = zext i8 [[TMP466]] to i32 +// SIMD-ONLY0-NEXT: [[CMP747:%.*]] = icmp eq i32 [[CONV745]], [[CONV746]] +// SIMD-ONLY0-NEXT: br i1 [[CMP747]], label [[IF_THEN749:%.*]], label [[IF_END750:%.*]] +// SIMD-ONLY0: if.then749: +// SIMD-ONLY0-NEXT: [[TMP467:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP467]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END750]] +// SIMD-ONLY0: if.end750: +// SIMD-ONLY0-NEXT: [[TMP468:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV751:%.*]] = zext i8 [[TMP468]] to i32 +// SIMD-ONLY0-NEXT: [[TMP469:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV752:%.*]] = zext i8 [[TMP469]] to i32 +// SIMD-ONLY0-NEXT: [[CMP753:%.*]] = icmp sgt i32 [[CONV751]], [[CONV752]] +// SIMD-ONLY0-NEXT: br i1 [[CMP753]], label [[IF_THEN755:%.*]], label [[IF_END756:%.*]] +// SIMD-ONLY0: if.then755: +// SIMD-ONLY0-NEXT: [[TMP470:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP470]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END756]] +// SIMD-ONLY0: if.end756: +// SIMD-ONLY0-NEXT: [[TMP471:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP471]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP472:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV757:%.*]] = zext i8 [[TMP472]] to i32 +// SIMD-ONLY0-NEXT: [[TMP473:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV758:%.*]] = zext i8 [[TMP473]] to i32 +// SIMD-ONLY0-NEXT: [[CMP759:%.*]] = icmp sgt i32 [[CONV757]], [[CONV758]] +// SIMD-ONLY0-NEXT: br i1 [[CMP759]], label [[IF_THEN761:%.*]], label [[IF_END762:%.*]] +// SIMD-ONLY0: if.then761: +// SIMD-ONLY0-NEXT: [[TMP474:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP474]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END762]] +// SIMD-ONLY0: if.end762: +// SIMD-ONLY0-NEXT: [[TMP475:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP475]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP476:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV763:%.*]] = zext i8 [[TMP476]] to i32 +// SIMD-ONLY0-NEXT: [[TMP477:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV764:%.*]] = zext i8 [[TMP477]] to i32 +// SIMD-ONLY0-NEXT: [[CMP765:%.*]] = icmp slt i32 [[CONV763]], [[CONV764]] +// SIMD-ONLY0-NEXT: br i1 [[CMP765]], label [[IF_THEN767:%.*]], label [[IF_END768:%.*]] +// SIMD-ONLY0: if.then767: +// SIMD-ONLY0-NEXT: [[TMP478:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP478]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END768]] +// SIMD-ONLY0: if.end768: +// SIMD-ONLY0-NEXT: [[TMP479:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP479]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP480:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV769:%.*]] = zext i8 [[TMP480]] to i32 +// SIMD-ONLY0-NEXT: [[TMP481:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV770:%.*]] = zext i8 [[TMP481]] to i32 +// SIMD-ONLY0-NEXT: [[CMP771:%.*]] = icmp slt i32 [[CONV769]], [[CONV770]] +// SIMD-ONLY0-NEXT: br i1 [[CMP771]], label [[IF_THEN773:%.*]], label [[IF_END774:%.*]] +// SIMD-ONLY0: if.then773: +// SIMD-ONLY0-NEXT: [[TMP482:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP482]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END774]] +// SIMD-ONLY0: if.end774: +// SIMD-ONLY0-NEXT: [[TMP483:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP483]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP484:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV775:%.*]] = zext i8 [[TMP484]] to i32 +// SIMD-ONLY0-NEXT: [[TMP485:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV776:%.*]] = zext i8 [[TMP485]] to i32 +// SIMD-ONLY0-NEXT: [[CMP777:%.*]] = icmp eq i32 [[CONV775]], [[CONV776]] +// SIMD-ONLY0-NEXT: br i1 [[CMP777]], label [[IF_THEN779:%.*]], label [[IF_END780:%.*]] +// SIMD-ONLY0: if.then779: +// SIMD-ONLY0-NEXT: [[TMP486:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP486]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END780]] +// SIMD-ONLY0: if.end780: +// SIMD-ONLY0-NEXT: [[TMP487:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP487]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP488:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV781:%.*]] = zext i8 [[TMP488]] to i32 +// SIMD-ONLY0-NEXT: [[TMP489:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV782:%.*]] = zext i8 [[TMP489]] to i32 +// SIMD-ONLY0-NEXT: [[CMP783:%.*]] = icmp eq i32 [[CONV781]], [[CONV782]] +// SIMD-ONLY0-NEXT: br i1 [[CMP783]], label [[IF_THEN785:%.*]], label [[IF_END786:%.*]] +// SIMD-ONLY0: if.then785: +// SIMD-ONLY0-NEXT: [[TMP490:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP490]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END786]] +// SIMD-ONLY0: if.end786: +// SIMD-ONLY0-NEXT: [[TMP491:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP491]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP492:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV787:%.*]] = zext i8 [[TMP492]] to i32 +// SIMD-ONLY0-NEXT: [[TMP493:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV788:%.*]] = zext i8 [[TMP493]] to i32 +// SIMD-ONLY0-NEXT: [[CMP789:%.*]] = icmp eq i32 [[CONV787]], [[CONV788]] +// SIMD-ONLY0-NEXT: br i1 [[CMP789]], label [[IF_THEN791:%.*]], label [[IF_ELSE792:%.*]] +// SIMD-ONLY0: if.then791: +// SIMD-ONLY0-NEXT: [[TMP494:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP494]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END793:%.*]] +// SIMD-ONLY0: if.else792: +// SIMD-ONLY0-NEXT: [[TMP495:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP495]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END793]] +// SIMD-ONLY0: if.end793: +// SIMD-ONLY0-NEXT: [[TMP496:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV794:%.*]] = zext i8 [[TMP496]] to i32 +// SIMD-ONLY0-NEXT: [[TMP497:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV795:%.*]] = zext i8 [[TMP497]] to i32 +// SIMD-ONLY0-NEXT: [[CMP796:%.*]] = icmp eq i32 [[CONV794]], [[CONV795]] +// SIMD-ONLY0-NEXT: br i1 [[CMP796]], label [[IF_THEN798:%.*]], label [[IF_ELSE799:%.*]] +// SIMD-ONLY0: if.then798: +// SIMD-ONLY0-NEXT: [[TMP498:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP498]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END800:%.*]] +// SIMD-ONLY0: if.else799: +// SIMD-ONLY0-NEXT: [[TMP499:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP499]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END800]] +// SIMD-ONLY0: if.end800: +// SIMD-ONLY0-NEXT: [[TMP500:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV801:%.*]] = zext i8 [[TMP500]] to i32 +// SIMD-ONLY0-NEXT: [[TMP501:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV802:%.*]] = zext i8 [[TMP501]] to i32 +// SIMD-ONLY0-NEXT: [[CMP803:%.*]] = icmp eq i32 [[CONV801]], [[CONV802]] +// SIMD-ONLY0-NEXT: [[CONV804:%.*]] = zext i1 [[CMP803]] to i32 +// SIMD-ONLY0-NEXT: [[CONV805:%.*]] = trunc i32 [[CONV804]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV805]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP502:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL806:%.*]] = icmp ne i8 [[TMP502]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL806]], label [[IF_THEN807:%.*]], label [[IF_END808:%.*]] +// SIMD-ONLY0: if.then807: +// SIMD-ONLY0-NEXT: [[TMP503:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP503]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END808]] +// SIMD-ONLY0: if.end808: +// SIMD-ONLY0-NEXT: [[TMP504:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV809:%.*]] = zext i8 [[TMP504]] to i32 +// SIMD-ONLY0-NEXT: [[TMP505:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV810:%.*]] = zext i8 [[TMP505]] to i32 +// SIMD-ONLY0-NEXT: [[CMP811:%.*]] = icmp eq i32 [[CONV809]], [[CONV810]] +// SIMD-ONLY0-NEXT: [[CONV812:%.*]] = zext i1 [[CMP811]] to i32 +// SIMD-ONLY0-NEXT: [[CONV813:%.*]] = trunc i32 [[CONV812]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV813]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP506:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL814:%.*]] = icmp ne i8 [[TMP506]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL814]], label [[IF_THEN815:%.*]], label [[IF_END816:%.*]] +// SIMD-ONLY0: if.then815: +// SIMD-ONLY0-NEXT: [[TMP507:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP507]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END816]] +// SIMD-ONLY0: if.end816: +// SIMD-ONLY0-NEXT: [[TMP508:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV817:%.*]] = zext i8 [[TMP508]] to i32 +// SIMD-ONLY0-NEXT: [[TMP509:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV818:%.*]] = zext i8 [[TMP509]] to i32 +// SIMD-ONLY0-NEXT: [[CMP819:%.*]] = icmp eq i32 [[CONV817]], [[CONV818]] +// SIMD-ONLY0-NEXT: [[CONV820:%.*]] = zext i1 [[CMP819]] to i32 +// SIMD-ONLY0-NEXT: [[CONV821:%.*]] = trunc i32 [[CONV820]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV821]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP510:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL822:%.*]] = icmp ne i8 [[TMP510]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL822]], label [[IF_THEN823:%.*]], label [[IF_ELSE824:%.*]] +// SIMD-ONLY0: if.then823: +// SIMD-ONLY0-NEXT: [[TMP511:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP511]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END825:%.*]] +// SIMD-ONLY0: if.else824: +// SIMD-ONLY0-NEXT: [[TMP512:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP512]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END825]] +// SIMD-ONLY0: if.end825: +// SIMD-ONLY0-NEXT: [[TMP513:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV826:%.*]] = zext i8 [[TMP513]] to i32 +// SIMD-ONLY0-NEXT: [[TMP514:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV827:%.*]] = zext i8 [[TMP514]] to i32 +// SIMD-ONLY0-NEXT: [[CMP828:%.*]] = icmp eq i32 [[CONV826]], [[CONV827]] +// SIMD-ONLY0-NEXT: [[CONV829:%.*]] = zext i1 [[CMP828]] to i32 +// SIMD-ONLY0-NEXT: [[CONV830:%.*]] = trunc i32 [[CONV829]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV830]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP515:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL831:%.*]] = icmp ne i8 [[TMP515]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL831]], label [[IF_THEN832:%.*]], label [[IF_ELSE833:%.*]] +// SIMD-ONLY0: if.then832: +// SIMD-ONLY0-NEXT: [[TMP516:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP516]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END834:%.*]] +// SIMD-ONLY0: if.else833: +// SIMD-ONLY0-NEXT: [[TMP517:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP517]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END834]] +// SIMD-ONLY0: if.end834: +// SIMD-ONLY0-NEXT: [[TMP518:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP518]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP519:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV835:%.*]] = zext i8 [[TMP519]] to i32 +// SIMD-ONLY0-NEXT: [[TMP520:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV836:%.*]] = zext i8 [[TMP520]] to i32 +// SIMD-ONLY0-NEXT: [[CMP837:%.*]] = icmp sgt i32 [[CONV835]], [[CONV836]] +// SIMD-ONLY0-NEXT: br i1 [[CMP837]], label [[IF_THEN839:%.*]], label [[IF_END840:%.*]] +// SIMD-ONLY0: if.then839: +// SIMD-ONLY0-NEXT: [[TMP521:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP521]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END840]] +// SIMD-ONLY0: if.end840: +// SIMD-ONLY0-NEXT: [[TMP522:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP522]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP523:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV841:%.*]] = zext i8 [[TMP523]] to i32 +// SIMD-ONLY0-NEXT: [[TMP524:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV842:%.*]] = zext i8 [[TMP524]] to i32 +// SIMD-ONLY0-NEXT: [[CMP843:%.*]] = icmp sgt i32 [[CONV841]], [[CONV842]] +// SIMD-ONLY0-NEXT: br i1 [[CMP843]], label [[IF_THEN845:%.*]], label [[IF_END846:%.*]] +// SIMD-ONLY0: if.then845: +// SIMD-ONLY0-NEXT: [[TMP525:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP525]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END846]] +// SIMD-ONLY0: if.end846: +// SIMD-ONLY0-NEXT: [[TMP526:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP526]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP527:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV847:%.*]] = zext i8 [[TMP527]] to i32 +// SIMD-ONLY0-NEXT: [[TMP528:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV848:%.*]] = zext i8 [[TMP528]] to i32 +// SIMD-ONLY0-NEXT: [[CMP849:%.*]] = icmp slt i32 [[CONV847]], [[CONV848]] +// SIMD-ONLY0-NEXT: br i1 [[CMP849]], label [[IF_THEN851:%.*]], label [[IF_END852:%.*]] +// SIMD-ONLY0: if.then851: +// SIMD-ONLY0-NEXT: [[TMP529:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP529]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END852]] +// SIMD-ONLY0: if.end852: +// SIMD-ONLY0-NEXT: [[TMP530:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP530]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP531:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV853:%.*]] = zext i8 [[TMP531]] to i32 +// SIMD-ONLY0-NEXT: [[TMP532:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV854:%.*]] = zext i8 [[TMP532]] to i32 +// SIMD-ONLY0-NEXT: [[CMP855:%.*]] = icmp slt i32 [[CONV853]], [[CONV854]] +// SIMD-ONLY0-NEXT: br i1 [[CMP855]], label [[IF_THEN857:%.*]], label [[IF_END858:%.*]] +// SIMD-ONLY0: if.then857: +// SIMD-ONLY0-NEXT: [[TMP533:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP533]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END858]] +// SIMD-ONLY0: if.end858: +// SIMD-ONLY0-NEXT: [[TMP534:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP534]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP535:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV859:%.*]] = zext i8 [[TMP535]] to i32 +// SIMD-ONLY0-NEXT: [[TMP536:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV860:%.*]] = zext i8 [[TMP536]] to i32 +// SIMD-ONLY0-NEXT: [[CMP861:%.*]] = icmp eq i32 [[CONV859]], [[CONV860]] +// SIMD-ONLY0-NEXT: br i1 [[CMP861]], label [[IF_THEN863:%.*]], label [[IF_END864:%.*]] +// SIMD-ONLY0: if.then863: +// SIMD-ONLY0-NEXT: [[TMP537:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP537]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END864]] +// SIMD-ONLY0: if.end864: +// SIMD-ONLY0-NEXT: [[TMP538:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP538]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP539:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV865:%.*]] = zext i8 [[TMP539]] to i32 +// SIMD-ONLY0-NEXT: [[TMP540:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV866:%.*]] = zext i8 [[TMP540]] to i32 +// SIMD-ONLY0-NEXT: [[CMP867:%.*]] = icmp eq i32 [[CONV865]], [[CONV866]] +// SIMD-ONLY0-NEXT: br i1 [[CMP867]], label [[IF_THEN869:%.*]], label [[IF_END870:%.*]] +// SIMD-ONLY0: if.then869: +// SIMD-ONLY0-NEXT: [[TMP541:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP541]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END870]] +// SIMD-ONLY0: if.end870: +// SIMD-ONLY0-NEXT: [[TMP542:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV871:%.*]] = zext i8 [[TMP542]] to i32 +// SIMD-ONLY0-NEXT: [[TMP543:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV872:%.*]] = zext i8 [[TMP543]] to i32 +// SIMD-ONLY0-NEXT: [[CMP873:%.*]] = icmp sgt i32 [[CONV871]], [[CONV872]] +// SIMD-ONLY0-NEXT: br i1 [[CMP873]], label [[IF_THEN875:%.*]], label [[IF_END876:%.*]] +// SIMD-ONLY0: if.then875: +// SIMD-ONLY0-NEXT: [[TMP544:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP544]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END876]] +// SIMD-ONLY0: if.end876: +// SIMD-ONLY0-NEXT: [[TMP545:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP545]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP546:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV877:%.*]] = zext i8 [[TMP546]] to i32 +// SIMD-ONLY0-NEXT: [[TMP547:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV878:%.*]] = zext i8 [[TMP547]] to i32 +// SIMD-ONLY0-NEXT: [[CMP879:%.*]] = icmp sgt i32 [[CONV877]], [[CONV878]] +// SIMD-ONLY0-NEXT: br i1 [[CMP879]], label [[IF_THEN881:%.*]], label [[IF_END882:%.*]] +// SIMD-ONLY0: if.then881: +// SIMD-ONLY0-NEXT: [[TMP548:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP548]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END882]] +// SIMD-ONLY0: if.end882: +// SIMD-ONLY0-NEXT: [[TMP549:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP549]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP550:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV883:%.*]] = zext i8 [[TMP550]] to i32 +// SIMD-ONLY0-NEXT: [[TMP551:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV884:%.*]] = zext i8 [[TMP551]] to i32 +// SIMD-ONLY0-NEXT: [[CMP885:%.*]] = icmp slt i32 [[CONV883]], [[CONV884]] +// SIMD-ONLY0-NEXT: br i1 [[CMP885]], label [[IF_THEN887:%.*]], label [[IF_END888:%.*]] +// SIMD-ONLY0: if.then887: +// SIMD-ONLY0-NEXT: [[TMP552:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP552]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END888]] +// SIMD-ONLY0: if.end888: +// SIMD-ONLY0-NEXT: [[TMP553:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP553]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP554:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV889:%.*]] = zext i8 [[TMP554]] to i32 +// SIMD-ONLY0-NEXT: [[TMP555:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV890:%.*]] = zext i8 [[TMP555]] to i32 +// SIMD-ONLY0-NEXT: [[CMP891:%.*]] = icmp slt i32 [[CONV889]], [[CONV890]] +// SIMD-ONLY0-NEXT: br i1 [[CMP891]], label [[IF_THEN893:%.*]], label [[IF_END894:%.*]] +// SIMD-ONLY0: if.then893: +// SIMD-ONLY0-NEXT: [[TMP556:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP556]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END894]] +// SIMD-ONLY0: if.end894: +// SIMD-ONLY0-NEXT: [[TMP557:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP557]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP558:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV895:%.*]] = zext i8 [[TMP558]] to i32 +// SIMD-ONLY0-NEXT: [[TMP559:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV896:%.*]] = zext i8 [[TMP559]] to i32 +// SIMD-ONLY0-NEXT: [[CMP897:%.*]] = icmp eq i32 [[CONV895]], [[CONV896]] +// SIMD-ONLY0-NEXT: br i1 [[CMP897]], label [[IF_THEN899:%.*]], label [[IF_END900:%.*]] +// SIMD-ONLY0: if.then899: +// SIMD-ONLY0-NEXT: [[TMP560:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP560]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END900]] +// SIMD-ONLY0: if.end900: +// SIMD-ONLY0-NEXT: [[TMP561:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP561]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP562:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV901:%.*]] = zext i8 [[TMP562]] to i32 +// SIMD-ONLY0-NEXT: [[TMP563:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV902:%.*]] = zext i8 [[TMP563]] to i32 +// SIMD-ONLY0-NEXT: [[CMP903:%.*]] = icmp eq i32 [[CONV901]], [[CONV902]] +// SIMD-ONLY0-NEXT: br i1 [[CMP903]], label [[IF_THEN905:%.*]], label [[IF_END906:%.*]] +// SIMD-ONLY0: if.then905: +// SIMD-ONLY0-NEXT: [[TMP564:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP564]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END906]] +// SIMD-ONLY0: if.end906: +// SIMD-ONLY0-NEXT: [[TMP565:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP565]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP566:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV907:%.*]] = zext i8 [[TMP566]] to i32 +// SIMD-ONLY0-NEXT: [[TMP567:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV908:%.*]] = zext i8 [[TMP567]] to i32 +// SIMD-ONLY0-NEXT: [[CMP909:%.*]] = icmp eq i32 [[CONV907]], [[CONV908]] +// SIMD-ONLY0-NEXT: br i1 [[CMP909]], label [[IF_THEN911:%.*]], label [[IF_ELSE912:%.*]] +// SIMD-ONLY0: if.then911: +// SIMD-ONLY0-NEXT: [[TMP568:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP568]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END913:%.*]] +// SIMD-ONLY0: if.else912: +// SIMD-ONLY0-NEXT: [[TMP569:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP569]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END913]] +// SIMD-ONLY0: if.end913: +// SIMD-ONLY0-NEXT: [[TMP570:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV914:%.*]] = zext i8 [[TMP570]] to i32 +// SIMD-ONLY0-NEXT: [[TMP571:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV915:%.*]] = zext i8 [[TMP571]] to i32 +// SIMD-ONLY0-NEXT: [[CMP916:%.*]] = icmp eq i32 [[CONV914]], [[CONV915]] +// SIMD-ONLY0-NEXT: br i1 [[CMP916]], label [[IF_THEN918:%.*]], label [[IF_ELSE919:%.*]] +// SIMD-ONLY0: if.then918: +// SIMD-ONLY0-NEXT: [[TMP572:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP572]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END920:%.*]] +// SIMD-ONLY0: if.else919: +// SIMD-ONLY0-NEXT: [[TMP573:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP573]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END920]] +// SIMD-ONLY0: if.end920: +// SIMD-ONLY0-NEXT: [[TMP574:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV921:%.*]] = zext i8 [[TMP574]] to i32 +// SIMD-ONLY0-NEXT: [[TMP575:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV922:%.*]] = zext i8 [[TMP575]] to i32 +// SIMD-ONLY0-NEXT: [[CMP923:%.*]] = icmp eq i32 [[CONV921]], [[CONV922]] +// SIMD-ONLY0-NEXT: [[CONV924:%.*]] = zext i1 [[CMP923]] to i32 +// SIMD-ONLY0-NEXT: [[CONV925:%.*]] = trunc i32 [[CONV924]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV925]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP576:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL926:%.*]] = icmp ne i8 [[TMP576]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL926]], label [[IF_THEN927:%.*]], label [[IF_END928:%.*]] +// SIMD-ONLY0: if.then927: +// SIMD-ONLY0-NEXT: [[TMP577:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP577]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END928]] +// SIMD-ONLY0: if.end928: +// SIMD-ONLY0-NEXT: [[TMP578:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV929:%.*]] = zext i8 [[TMP578]] to i32 +// SIMD-ONLY0-NEXT: [[TMP579:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV930:%.*]] = zext i8 [[TMP579]] to i32 +// SIMD-ONLY0-NEXT: [[CMP931:%.*]] = icmp eq i32 [[CONV929]], [[CONV930]] +// SIMD-ONLY0-NEXT: [[CONV932:%.*]] = zext i1 [[CMP931]] to i32 +// SIMD-ONLY0-NEXT: [[CONV933:%.*]] = trunc i32 [[CONV932]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV933]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP580:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL934:%.*]] = icmp ne i8 [[TMP580]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL934]], label [[IF_THEN935:%.*]], label [[IF_END936:%.*]] +// SIMD-ONLY0: if.then935: +// SIMD-ONLY0-NEXT: [[TMP581:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP581]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END936]] +// SIMD-ONLY0: if.end936: +// SIMD-ONLY0-NEXT: [[TMP582:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV937:%.*]] = zext i8 [[TMP582]] to i32 +// SIMD-ONLY0-NEXT: [[TMP583:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV938:%.*]] = zext i8 [[TMP583]] to i32 +// SIMD-ONLY0-NEXT: [[CMP939:%.*]] = icmp eq i32 [[CONV937]], [[CONV938]] +// SIMD-ONLY0-NEXT: [[CONV940:%.*]] = zext i1 [[CMP939]] to i32 +// SIMD-ONLY0-NEXT: [[CONV941:%.*]] = trunc i32 [[CONV940]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV941]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP584:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL942:%.*]] = icmp ne i8 [[TMP584]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL942]], label [[IF_THEN943:%.*]], label [[IF_ELSE944:%.*]] +// SIMD-ONLY0: if.then943: +// SIMD-ONLY0-NEXT: [[TMP585:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP585]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END945:%.*]] +// SIMD-ONLY0: if.else944: +// SIMD-ONLY0-NEXT: [[TMP586:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP586]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END945]] +// SIMD-ONLY0: if.end945: +// SIMD-ONLY0-NEXT: [[TMP587:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV946:%.*]] = zext i8 [[TMP587]] to i32 +// SIMD-ONLY0-NEXT: [[TMP588:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV947:%.*]] = zext i8 [[TMP588]] to i32 +// SIMD-ONLY0-NEXT: [[CMP948:%.*]] = icmp eq i32 [[CONV946]], [[CONV947]] +// SIMD-ONLY0-NEXT: [[CONV949:%.*]] = zext i1 [[CMP948]] to i32 +// SIMD-ONLY0-NEXT: [[CONV950:%.*]] = trunc i32 [[CONV949]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV950]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP589:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL951:%.*]] = icmp ne i8 [[TMP589]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL951]], label [[IF_THEN952:%.*]], label [[IF_ELSE953:%.*]] +// SIMD-ONLY0: if.then952: +// SIMD-ONLY0-NEXT: [[TMP590:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP590]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END954:%.*]] +// SIMD-ONLY0: if.else953: +// SIMD-ONLY0-NEXT: [[TMP591:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP591]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END954]] +// SIMD-ONLY0: if.end954: +// SIMD-ONLY0-NEXT: [[TMP592:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP592]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP593:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV955:%.*]] = zext i8 [[TMP593]] to i32 +// SIMD-ONLY0-NEXT: [[TMP594:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV956:%.*]] = zext i8 [[TMP594]] to i32 +// SIMD-ONLY0-NEXT: [[CMP957:%.*]] = icmp sgt i32 [[CONV955]], [[CONV956]] +// SIMD-ONLY0-NEXT: br i1 [[CMP957]], label [[IF_THEN959:%.*]], label [[IF_END960:%.*]] +// SIMD-ONLY0: if.then959: +// SIMD-ONLY0-NEXT: [[TMP595:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP595]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END960]] +// SIMD-ONLY0: if.end960: +// SIMD-ONLY0-NEXT: [[TMP596:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP596]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP597:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV961:%.*]] = zext i8 [[TMP597]] to i32 +// SIMD-ONLY0-NEXT: [[TMP598:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV962:%.*]] = zext i8 [[TMP598]] to i32 +// SIMD-ONLY0-NEXT: [[CMP963:%.*]] = icmp sgt i32 [[CONV961]], [[CONV962]] +// SIMD-ONLY0-NEXT: br i1 [[CMP963]], label [[IF_THEN965:%.*]], label [[IF_END966:%.*]] +// SIMD-ONLY0: if.then965: +// SIMD-ONLY0-NEXT: [[TMP599:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP599]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END966]] +// SIMD-ONLY0: if.end966: +// SIMD-ONLY0-NEXT: [[TMP600:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP600]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP601:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV967:%.*]] = zext i8 [[TMP601]] to i32 +// SIMD-ONLY0-NEXT: [[TMP602:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV968:%.*]] = zext i8 [[TMP602]] to i32 +// SIMD-ONLY0-NEXT: [[CMP969:%.*]] = icmp slt i32 [[CONV967]], [[CONV968]] +// SIMD-ONLY0-NEXT: br i1 [[CMP969]], label [[IF_THEN971:%.*]], label [[IF_END972:%.*]] +// SIMD-ONLY0: if.then971: +// SIMD-ONLY0-NEXT: [[TMP603:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP603]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END972]] +// SIMD-ONLY0: if.end972: +// SIMD-ONLY0-NEXT: [[TMP604:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP604]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP605:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV973:%.*]] = zext i8 [[TMP605]] to i32 +// SIMD-ONLY0-NEXT: [[TMP606:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV974:%.*]] = zext i8 [[TMP606]] to i32 +// SIMD-ONLY0-NEXT: [[CMP975:%.*]] = icmp slt i32 [[CONV973]], [[CONV974]] +// SIMD-ONLY0-NEXT: br i1 [[CMP975]], label [[IF_THEN977:%.*]], label [[IF_END978:%.*]] +// SIMD-ONLY0: if.then977: +// SIMD-ONLY0-NEXT: [[TMP607:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP607]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END978]] +// SIMD-ONLY0: if.end978: +// SIMD-ONLY0-NEXT: [[TMP608:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP608]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP609:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV979:%.*]] = zext i8 [[TMP609]] to i32 +// SIMD-ONLY0-NEXT: [[TMP610:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV980:%.*]] = zext i8 [[TMP610]] to i32 +// SIMD-ONLY0-NEXT: [[CMP981:%.*]] = icmp eq i32 [[CONV979]], [[CONV980]] +// SIMD-ONLY0-NEXT: br i1 [[CMP981]], label [[IF_THEN983:%.*]], label [[IF_END984:%.*]] +// SIMD-ONLY0: if.then983: +// SIMD-ONLY0-NEXT: [[TMP611:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP611]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END984]] +// SIMD-ONLY0: if.end984: +// SIMD-ONLY0-NEXT: [[TMP612:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP612]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP613:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV985:%.*]] = zext i8 [[TMP613]] to i32 +// SIMD-ONLY0-NEXT: [[TMP614:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV986:%.*]] = zext i8 [[TMP614]] to i32 +// SIMD-ONLY0-NEXT: [[CMP987:%.*]] = icmp eq i32 [[CONV985]], [[CONV986]] +// SIMD-ONLY0-NEXT: br i1 [[CMP987]], label [[IF_THEN989:%.*]], label [[IF_END990:%.*]] +// SIMD-ONLY0: if.then989: +// SIMD-ONLY0-NEXT: [[TMP615:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP615]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END990]] +// SIMD-ONLY0: if.end990: +// SIMD-ONLY0-NEXT: [[TMP616:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV991:%.*]] = zext i8 [[TMP616]] to i32 +// SIMD-ONLY0-NEXT: [[TMP617:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV992:%.*]] = zext i8 [[TMP617]] to i32 +// SIMD-ONLY0-NEXT: [[CMP993:%.*]] = icmp sgt i32 [[CONV991]], [[CONV992]] +// SIMD-ONLY0-NEXT: br i1 [[CMP993]], label [[IF_THEN995:%.*]], label [[IF_END996:%.*]] +// SIMD-ONLY0: if.then995: +// SIMD-ONLY0-NEXT: [[TMP618:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP618]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END996]] +// SIMD-ONLY0: if.end996: +// SIMD-ONLY0-NEXT: [[TMP619:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP619]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP620:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV997:%.*]] = zext i8 [[TMP620]] to i32 +// SIMD-ONLY0-NEXT: [[TMP621:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV998:%.*]] = zext i8 [[TMP621]] to i32 +// SIMD-ONLY0-NEXT: [[CMP999:%.*]] = icmp sgt i32 [[CONV997]], [[CONV998]] +// SIMD-ONLY0-NEXT: br i1 [[CMP999]], label [[IF_THEN1001:%.*]], label [[IF_END1002:%.*]] +// SIMD-ONLY0: if.then1001: +// SIMD-ONLY0-NEXT: [[TMP622:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP622]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1002]] +// SIMD-ONLY0: if.end1002: +// SIMD-ONLY0-NEXT: [[TMP623:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP623]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP624:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1003:%.*]] = zext i8 [[TMP624]] to i32 +// SIMD-ONLY0-NEXT: [[TMP625:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1004:%.*]] = zext i8 [[TMP625]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1005:%.*]] = icmp slt i32 [[CONV1003]], [[CONV1004]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1005]], label [[IF_THEN1007:%.*]], label [[IF_END1008:%.*]] +// SIMD-ONLY0: if.then1007: +// SIMD-ONLY0-NEXT: [[TMP626:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP626]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1008]] +// SIMD-ONLY0: if.end1008: +// SIMD-ONLY0-NEXT: [[TMP627:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP627]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP628:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1009:%.*]] = zext i8 [[TMP628]] to i32 +// SIMD-ONLY0-NEXT: [[TMP629:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1010:%.*]] = zext i8 [[TMP629]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1011:%.*]] = icmp slt i32 [[CONV1009]], [[CONV1010]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1011]], label [[IF_THEN1013:%.*]], label [[IF_END1014:%.*]] +// SIMD-ONLY0: if.then1013: +// SIMD-ONLY0-NEXT: [[TMP630:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP630]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1014]] +// SIMD-ONLY0: if.end1014: +// SIMD-ONLY0-NEXT: [[TMP631:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP631]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP632:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1015:%.*]] = zext i8 [[TMP632]] to i32 +// SIMD-ONLY0-NEXT: [[TMP633:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1016:%.*]] = zext i8 [[TMP633]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1017:%.*]] = icmp eq i32 [[CONV1015]], [[CONV1016]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1017]], label [[IF_THEN1019:%.*]], label [[IF_END1020:%.*]] +// SIMD-ONLY0: if.then1019: +// SIMD-ONLY0-NEXT: [[TMP634:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP634]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1020]] +// SIMD-ONLY0: if.end1020: +// SIMD-ONLY0-NEXT: [[TMP635:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP635]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP636:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1021:%.*]] = zext i8 [[TMP636]] to i32 +// SIMD-ONLY0-NEXT: [[TMP637:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1022:%.*]] = zext i8 [[TMP637]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1023:%.*]] = icmp eq i32 [[CONV1021]], [[CONV1022]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1023]], label [[IF_THEN1025:%.*]], label [[IF_END1026:%.*]] +// SIMD-ONLY0: if.then1025: +// SIMD-ONLY0-NEXT: [[TMP638:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP638]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1026]] +// SIMD-ONLY0: if.end1026: +// SIMD-ONLY0-NEXT: [[TMP639:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP639]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP640:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1027:%.*]] = zext i8 [[TMP640]] to i32 +// SIMD-ONLY0-NEXT: [[TMP641:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1028:%.*]] = zext i8 [[TMP641]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1029:%.*]] = icmp eq i32 [[CONV1027]], [[CONV1028]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1029]], label [[IF_THEN1031:%.*]], label [[IF_ELSE1032:%.*]] +// SIMD-ONLY0: if.then1031: +// SIMD-ONLY0-NEXT: [[TMP642:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP642]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1033:%.*]] +// SIMD-ONLY0: if.else1032: +// SIMD-ONLY0-NEXT: [[TMP643:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP643]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1033]] +// SIMD-ONLY0: if.end1033: +// SIMD-ONLY0-NEXT: [[TMP644:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1034:%.*]] = zext i8 [[TMP644]] to i32 +// SIMD-ONLY0-NEXT: [[TMP645:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1035:%.*]] = zext i8 [[TMP645]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1036:%.*]] = icmp eq i32 [[CONV1034]], [[CONV1035]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1036]], label [[IF_THEN1038:%.*]], label [[IF_ELSE1039:%.*]] +// SIMD-ONLY0: if.then1038: +// SIMD-ONLY0-NEXT: [[TMP646:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP646]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1040:%.*]] +// SIMD-ONLY0: if.else1039: +// SIMD-ONLY0-NEXT: [[TMP647:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP647]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1040]] +// SIMD-ONLY0: if.end1040: +// SIMD-ONLY0-NEXT: [[TMP648:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1041:%.*]] = zext i8 [[TMP648]] to i32 +// SIMD-ONLY0-NEXT: [[TMP649:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1042:%.*]] = zext i8 [[TMP649]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1043:%.*]] = icmp eq i32 [[CONV1041]], [[CONV1042]] +// SIMD-ONLY0-NEXT: [[CONV1044:%.*]] = zext i1 [[CMP1043]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1045:%.*]] = trunc i32 [[CONV1044]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1045]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP650:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1046:%.*]] = icmp ne i8 [[TMP650]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1046]], label [[IF_THEN1047:%.*]], label [[IF_END1048:%.*]] +// SIMD-ONLY0: if.then1047: +// SIMD-ONLY0-NEXT: [[TMP651:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP651]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1048]] +// SIMD-ONLY0: if.end1048: +// SIMD-ONLY0-NEXT: [[TMP652:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1049:%.*]] = zext i8 [[TMP652]] to i32 +// SIMD-ONLY0-NEXT: [[TMP653:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1050:%.*]] = zext i8 [[TMP653]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1051:%.*]] = icmp eq i32 [[CONV1049]], [[CONV1050]] +// SIMD-ONLY0-NEXT: [[CONV1052:%.*]] = zext i1 [[CMP1051]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1053:%.*]] = trunc i32 [[CONV1052]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1053]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP654:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1054:%.*]] = icmp ne i8 [[TMP654]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1054]], label [[IF_THEN1055:%.*]], label [[IF_END1056:%.*]] +// SIMD-ONLY0: if.then1055: +// SIMD-ONLY0-NEXT: [[TMP655:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP655]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1056]] +// SIMD-ONLY0: if.end1056: +// SIMD-ONLY0-NEXT: [[TMP656:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1057:%.*]] = zext i8 [[TMP656]] to i32 +// SIMD-ONLY0-NEXT: [[TMP657:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1058:%.*]] = zext i8 [[TMP657]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1059:%.*]] = icmp eq i32 [[CONV1057]], [[CONV1058]] +// SIMD-ONLY0-NEXT: [[CONV1060:%.*]] = zext i1 [[CMP1059]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1061:%.*]] = trunc i32 [[CONV1060]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1061]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP658:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1062:%.*]] = icmp ne i8 [[TMP658]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1062]], label [[IF_THEN1063:%.*]], label [[IF_ELSE1064:%.*]] +// SIMD-ONLY0: if.then1063: +// SIMD-ONLY0-NEXT: [[TMP659:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP659]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1065:%.*]] +// SIMD-ONLY0: if.else1064: +// SIMD-ONLY0-NEXT: [[TMP660:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP660]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1065]] +// SIMD-ONLY0: if.end1065: +// SIMD-ONLY0-NEXT: [[TMP661:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1066:%.*]] = zext i8 [[TMP661]] to i32 +// SIMD-ONLY0-NEXT: [[TMP662:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1067:%.*]] = zext i8 [[TMP662]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1068:%.*]] = icmp eq i32 [[CONV1066]], [[CONV1067]] +// SIMD-ONLY0-NEXT: [[CONV1069:%.*]] = zext i1 [[CMP1068]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1070:%.*]] = trunc i32 [[CONV1069]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1070]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP663:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1071:%.*]] = icmp ne i8 [[TMP663]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1071]], label [[IF_THEN1072:%.*]], label [[IF_ELSE1073:%.*]] +// SIMD-ONLY0: if.then1072: +// SIMD-ONLY0-NEXT: [[TMP664:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP664]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1074:%.*]] +// SIMD-ONLY0: if.else1073: +// SIMD-ONLY0-NEXT: [[TMP665:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP665]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1074]] +// SIMD-ONLY0: if.end1074: +// SIMD-ONLY0-NEXT: [[TMP666:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP666]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP667:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1075:%.*]] = zext i8 [[TMP667]] to i32 +// SIMD-ONLY0-NEXT: [[TMP668:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1076:%.*]] = zext i8 [[TMP668]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1077:%.*]] = icmp sgt i32 [[CONV1075]], [[CONV1076]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1077]], label [[IF_THEN1079:%.*]], label [[IF_END1080:%.*]] +// SIMD-ONLY0: if.then1079: +// SIMD-ONLY0-NEXT: [[TMP669:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP669]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1080]] +// SIMD-ONLY0: if.end1080: +// SIMD-ONLY0-NEXT: [[TMP670:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP670]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP671:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1081:%.*]] = zext i8 [[TMP671]] to i32 +// SIMD-ONLY0-NEXT: [[TMP672:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1082:%.*]] = zext i8 [[TMP672]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1083:%.*]] = icmp sgt i32 [[CONV1081]], [[CONV1082]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1083]], label [[IF_THEN1085:%.*]], label [[IF_END1086:%.*]] +// SIMD-ONLY0: if.then1085: +// SIMD-ONLY0-NEXT: [[TMP673:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP673]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1086]] +// SIMD-ONLY0: if.end1086: +// SIMD-ONLY0-NEXT: [[TMP674:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP674]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP675:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1087:%.*]] = zext i8 [[TMP675]] to i32 +// SIMD-ONLY0-NEXT: [[TMP676:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1088:%.*]] = zext i8 [[TMP676]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1089:%.*]] = icmp slt i32 [[CONV1087]], [[CONV1088]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1089]], label [[IF_THEN1091:%.*]], label [[IF_END1092:%.*]] +// SIMD-ONLY0: if.then1091: +// SIMD-ONLY0-NEXT: [[TMP677:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP677]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1092]] +// SIMD-ONLY0: if.end1092: +// SIMD-ONLY0-NEXT: [[TMP678:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP678]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP679:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1093:%.*]] = zext i8 [[TMP679]] to i32 +// SIMD-ONLY0-NEXT: [[TMP680:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1094:%.*]] = zext i8 [[TMP680]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1095:%.*]] = icmp slt i32 [[CONV1093]], [[CONV1094]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1095]], label [[IF_THEN1097:%.*]], label [[IF_END1098:%.*]] +// SIMD-ONLY0: if.then1097: +// SIMD-ONLY0-NEXT: [[TMP681:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP681]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1098]] +// SIMD-ONLY0: if.end1098: +// SIMD-ONLY0-NEXT: [[TMP682:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP682]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP683:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1099:%.*]] = zext i8 [[TMP683]] to i32 +// SIMD-ONLY0-NEXT: [[TMP684:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1100:%.*]] = zext i8 [[TMP684]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1101:%.*]] = icmp eq i32 [[CONV1099]], [[CONV1100]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1101]], label [[IF_THEN1103:%.*]], label [[IF_END1104:%.*]] +// SIMD-ONLY0: if.then1103: +// SIMD-ONLY0-NEXT: [[TMP685:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP685]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1104]] +// SIMD-ONLY0: if.end1104: +// SIMD-ONLY0-NEXT: [[TMP686:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP686]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP687:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1105:%.*]] = zext i8 [[TMP687]] to i32 +// SIMD-ONLY0-NEXT: [[TMP688:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1106:%.*]] = zext i8 [[TMP688]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1107:%.*]] = icmp eq i32 [[CONV1105]], [[CONV1106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1107]], label [[IF_THEN1109:%.*]], label [[IF_END1110:%.*]] +// SIMD-ONLY0: if.then1109: +// SIMD-ONLY0-NEXT: [[TMP689:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP689]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1110]] +// SIMD-ONLY0: if.end1110: +// SIMD-ONLY0-NEXT: [[TMP690:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1111:%.*]] = zext i8 [[TMP690]] to i32 +// SIMD-ONLY0-NEXT: [[TMP691:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1112:%.*]] = zext i8 [[TMP691]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1113:%.*]] = icmp sgt i32 [[CONV1111]], [[CONV1112]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1113]], label [[IF_THEN1115:%.*]], label [[IF_END1116:%.*]] +// SIMD-ONLY0: if.then1115: +// SIMD-ONLY0-NEXT: [[TMP692:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP692]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1116]] +// SIMD-ONLY0: if.end1116: +// SIMD-ONLY0-NEXT: [[TMP693:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP693]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP694:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1117:%.*]] = zext i8 [[TMP694]] to i32 +// SIMD-ONLY0-NEXT: [[TMP695:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1118:%.*]] = zext i8 [[TMP695]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1119:%.*]] = icmp sgt i32 [[CONV1117]], [[CONV1118]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1119]], label [[IF_THEN1121:%.*]], label [[IF_END1122:%.*]] +// SIMD-ONLY0: if.then1121: +// SIMD-ONLY0-NEXT: [[TMP696:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP696]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1122]] +// SIMD-ONLY0: if.end1122: +// SIMD-ONLY0-NEXT: [[TMP697:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP697]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP698:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1123:%.*]] = zext i8 [[TMP698]] to i32 +// SIMD-ONLY0-NEXT: [[TMP699:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1124:%.*]] = zext i8 [[TMP699]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1125:%.*]] = icmp slt i32 [[CONV1123]], [[CONV1124]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1125]], label [[IF_THEN1127:%.*]], label [[IF_END1128:%.*]] +// SIMD-ONLY0: if.then1127: +// SIMD-ONLY0-NEXT: [[TMP700:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP700]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1128]] +// SIMD-ONLY0: if.end1128: +// SIMD-ONLY0-NEXT: [[TMP701:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP701]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP702:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1129:%.*]] = zext i8 [[TMP702]] to i32 +// SIMD-ONLY0-NEXT: [[TMP703:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1130:%.*]] = zext i8 [[TMP703]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1131:%.*]] = icmp slt i32 [[CONV1129]], [[CONV1130]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1131]], label [[IF_THEN1133:%.*]], label [[IF_END1134:%.*]] +// SIMD-ONLY0: if.then1133: +// SIMD-ONLY0-NEXT: [[TMP704:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP704]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1134]] +// SIMD-ONLY0: if.end1134: +// SIMD-ONLY0-NEXT: [[TMP705:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP705]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP706:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1135:%.*]] = zext i8 [[TMP706]] to i32 +// SIMD-ONLY0-NEXT: [[TMP707:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1136:%.*]] = zext i8 [[TMP707]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1137:%.*]] = icmp eq i32 [[CONV1135]], [[CONV1136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1137]], label [[IF_THEN1139:%.*]], label [[IF_END1140:%.*]] +// SIMD-ONLY0: if.then1139: +// SIMD-ONLY0-NEXT: [[TMP708:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP708]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1140]] +// SIMD-ONLY0: if.end1140: +// SIMD-ONLY0-NEXT: [[TMP709:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP709]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP710:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1141:%.*]] = zext i8 [[TMP710]] to i32 +// SIMD-ONLY0-NEXT: [[TMP711:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1142:%.*]] = zext i8 [[TMP711]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1143:%.*]] = icmp eq i32 [[CONV1141]], [[CONV1142]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1143]], label [[IF_THEN1145:%.*]], label [[IF_END1146:%.*]] +// SIMD-ONLY0: if.then1145: +// SIMD-ONLY0-NEXT: [[TMP712:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP712]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1146]] +// SIMD-ONLY0: if.end1146: +// SIMD-ONLY0-NEXT: [[TMP713:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP713]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP714:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1147:%.*]] = zext i8 [[TMP714]] to i32 +// SIMD-ONLY0-NEXT: [[TMP715:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1148:%.*]] = zext i8 [[TMP715]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1149:%.*]] = icmp eq i32 [[CONV1147]], [[CONV1148]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1149]], label [[IF_THEN1151:%.*]], label [[IF_ELSE1152:%.*]] +// SIMD-ONLY0: if.then1151: +// SIMD-ONLY0-NEXT: [[TMP716:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP716]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1153:%.*]] +// SIMD-ONLY0: if.else1152: +// SIMD-ONLY0-NEXT: [[TMP717:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP717]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1153]] +// SIMD-ONLY0: if.end1153: +// SIMD-ONLY0-NEXT: [[TMP718:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1154:%.*]] = zext i8 [[TMP718]] to i32 +// SIMD-ONLY0-NEXT: [[TMP719:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1155:%.*]] = zext i8 [[TMP719]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1156:%.*]] = icmp eq i32 [[CONV1154]], [[CONV1155]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1156]], label [[IF_THEN1158:%.*]], label [[IF_ELSE1159:%.*]] +// SIMD-ONLY0: if.then1158: +// SIMD-ONLY0-NEXT: [[TMP720:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP720]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1160:%.*]] +// SIMD-ONLY0: if.else1159: +// SIMD-ONLY0-NEXT: [[TMP721:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP721]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1160]] +// SIMD-ONLY0: if.end1160: +// SIMD-ONLY0-NEXT: [[TMP722:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1161:%.*]] = zext i8 [[TMP722]] to i32 +// SIMD-ONLY0-NEXT: [[TMP723:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1162:%.*]] = zext i8 [[TMP723]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1163:%.*]] = icmp eq i32 [[CONV1161]], [[CONV1162]] +// SIMD-ONLY0-NEXT: [[CONV1164:%.*]] = zext i1 [[CMP1163]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1165:%.*]] = trunc i32 [[CONV1164]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1165]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP724:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1166:%.*]] = icmp ne i8 [[TMP724]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1166]], label [[IF_THEN1167:%.*]], label [[IF_END1168:%.*]] +// SIMD-ONLY0: if.then1167: +// SIMD-ONLY0-NEXT: [[TMP725:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP725]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1168]] +// SIMD-ONLY0: if.end1168: +// SIMD-ONLY0-NEXT: [[TMP726:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1169:%.*]] = zext i8 [[TMP726]] to i32 +// SIMD-ONLY0-NEXT: [[TMP727:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1170:%.*]] = zext i8 [[TMP727]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1171:%.*]] = icmp eq i32 [[CONV1169]], [[CONV1170]] +// SIMD-ONLY0-NEXT: [[CONV1172:%.*]] = zext i1 [[CMP1171]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1173:%.*]] = trunc i32 [[CONV1172]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1173]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP728:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1174:%.*]] = icmp ne i8 [[TMP728]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1174]], label [[IF_THEN1175:%.*]], label [[IF_END1176:%.*]] +// SIMD-ONLY0: if.then1175: +// SIMD-ONLY0-NEXT: [[TMP729:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP729]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1176]] +// SIMD-ONLY0: if.end1176: +// SIMD-ONLY0-NEXT: [[TMP730:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1177:%.*]] = zext i8 [[TMP730]] to i32 +// SIMD-ONLY0-NEXT: [[TMP731:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1178:%.*]] = zext i8 [[TMP731]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1179:%.*]] = icmp eq i32 [[CONV1177]], [[CONV1178]] +// SIMD-ONLY0-NEXT: [[CONV1180:%.*]] = zext i1 [[CMP1179]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1181:%.*]] = trunc i32 [[CONV1180]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1181]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP732:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1182:%.*]] = icmp ne i8 [[TMP732]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1182]], label [[IF_THEN1183:%.*]], label [[IF_ELSE1184:%.*]] +// SIMD-ONLY0: if.then1183: +// SIMD-ONLY0-NEXT: [[TMP733:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP733]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1185:%.*]] +// SIMD-ONLY0: if.else1184: +// SIMD-ONLY0-NEXT: [[TMP734:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP734]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1185]] +// SIMD-ONLY0: if.end1185: +// SIMD-ONLY0-NEXT: [[TMP735:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1186:%.*]] = zext i8 [[TMP735]] to i32 +// SIMD-ONLY0-NEXT: [[TMP736:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1187:%.*]] = zext i8 [[TMP736]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1188:%.*]] = icmp eq i32 [[CONV1186]], [[CONV1187]] +// SIMD-ONLY0-NEXT: [[CONV1189:%.*]] = zext i1 [[CMP1188]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1190:%.*]] = trunc i32 [[CONV1189]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1190]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP737:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1191:%.*]] = icmp ne i8 [[TMP737]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1191]], label [[IF_THEN1192:%.*]], label [[IF_ELSE1193:%.*]] +// SIMD-ONLY0: if.then1192: +// SIMD-ONLY0-NEXT: [[TMP738:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP738]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1194:%.*]] +// SIMD-ONLY0: if.else1193: +// SIMD-ONLY0-NEXT: [[TMP739:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP739]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1194]] +// SIMD-ONLY0: if.end1194: +// SIMD-ONLY0-NEXT: [[TMP740:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP740]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP741:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1195:%.*]] = zext i8 [[TMP741]] to i32 +// SIMD-ONLY0-NEXT: [[TMP742:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1196:%.*]] = zext i8 [[TMP742]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1197:%.*]] = icmp sgt i32 [[CONV1195]], [[CONV1196]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1197]], label [[IF_THEN1199:%.*]], label [[IF_END1200:%.*]] +// SIMD-ONLY0: if.then1199: +// SIMD-ONLY0-NEXT: [[TMP743:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP743]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1200]] +// SIMD-ONLY0: if.end1200: +// SIMD-ONLY0-NEXT: [[TMP744:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP744]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP745:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1201:%.*]] = zext i8 [[TMP745]] to i32 +// SIMD-ONLY0-NEXT: [[TMP746:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1202:%.*]] = zext i8 [[TMP746]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1203:%.*]] = icmp sgt i32 [[CONV1201]], [[CONV1202]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1203]], label [[IF_THEN1205:%.*]], label [[IF_END1206:%.*]] +// SIMD-ONLY0: if.then1205: +// SIMD-ONLY0-NEXT: [[TMP747:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP747]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1206]] +// SIMD-ONLY0: if.end1206: +// SIMD-ONLY0-NEXT: [[TMP748:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP748]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP749:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1207:%.*]] = zext i8 [[TMP749]] to i32 +// SIMD-ONLY0-NEXT: [[TMP750:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1208:%.*]] = zext i8 [[TMP750]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1209:%.*]] = icmp slt i32 [[CONV1207]], [[CONV1208]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1209]], label [[IF_THEN1211:%.*]], label [[IF_END1212:%.*]] +// SIMD-ONLY0: if.then1211: +// SIMD-ONLY0-NEXT: [[TMP751:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP751]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1212]] +// SIMD-ONLY0: if.end1212: +// SIMD-ONLY0-NEXT: [[TMP752:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP752]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP753:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1213:%.*]] = zext i8 [[TMP753]] to i32 +// SIMD-ONLY0-NEXT: [[TMP754:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1214:%.*]] = zext i8 [[TMP754]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1215:%.*]] = icmp slt i32 [[CONV1213]], [[CONV1214]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1215]], label [[IF_THEN1217:%.*]], label [[IF_END1218:%.*]] +// SIMD-ONLY0: if.then1217: +// SIMD-ONLY0-NEXT: [[TMP755:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP755]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1218]] +// SIMD-ONLY0: if.end1218: +// SIMD-ONLY0-NEXT: [[TMP756:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP756]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP757:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1219:%.*]] = zext i8 [[TMP757]] to i32 +// SIMD-ONLY0-NEXT: [[TMP758:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1220:%.*]] = zext i8 [[TMP758]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1221:%.*]] = icmp eq i32 [[CONV1219]], [[CONV1220]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1221]], label [[IF_THEN1223:%.*]], label [[IF_END1224:%.*]] +// SIMD-ONLY0: if.then1223: +// SIMD-ONLY0-NEXT: [[TMP759:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP759]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1224]] +// SIMD-ONLY0: if.end1224: +// SIMD-ONLY0-NEXT: [[TMP760:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP760]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP761:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1225:%.*]] = zext i8 [[TMP761]] to i32 +// SIMD-ONLY0-NEXT: [[TMP762:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1226:%.*]] = zext i8 [[TMP762]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1227:%.*]] = icmp eq i32 [[CONV1225]], [[CONV1226]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1227]], label [[IF_THEN1229:%.*]], label [[IF_END1230:%.*]] +// SIMD-ONLY0: if.then1229: +// SIMD-ONLY0-NEXT: [[TMP763:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP763]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1230]] +// SIMD-ONLY0: if.end1230: +// SIMD-ONLY0-NEXT: [[TMP764:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1231:%.*]] = zext i8 [[TMP764]] to i32 +// SIMD-ONLY0-NEXT: [[TMP765:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1232:%.*]] = zext i8 [[TMP765]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1233:%.*]] = icmp sgt i32 [[CONV1231]], [[CONV1232]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1233]], label [[IF_THEN1235:%.*]], label [[IF_END1236:%.*]] +// SIMD-ONLY0: if.then1235: +// SIMD-ONLY0-NEXT: [[TMP766:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP766]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1236]] +// SIMD-ONLY0: if.end1236: +// SIMD-ONLY0-NEXT: [[TMP767:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP767]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP768:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1237:%.*]] = zext i8 [[TMP768]] to i32 +// SIMD-ONLY0-NEXT: [[TMP769:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1238:%.*]] = zext i8 [[TMP769]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1239:%.*]] = icmp sgt i32 [[CONV1237]], [[CONV1238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1239]], label [[IF_THEN1241:%.*]], label [[IF_END1242:%.*]] +// SIMD-ONLY0: if.then1241: +// SIMD-ONLY0-NEXT: [[TMP770:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP770]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1242]] +// SIMD-ONLY0: if.end1242: +// SIMD-ONLY0-NEXT: [[TMP771:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP771]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP772:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1243:%.*]] = zext i8 [[TMP772]] to i32 +// SIMD-ONLY0-NEXT: [[TMP773:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1244:%.*]] = zext i8 [[TMP773]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1245:%.*]] = icmp slt i32 [[CONV1243]], [[CONV1244]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1245]], label [[IF_THEN1247:%.*]], label [[IF_END1248:%.*]] +// SIMD-ONLY0: if.then1247: +// SIMD-ONLY0-NEXT: [[TMP774:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP774]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1248]] +// SIMD-ONLY0: if.end1248: +// SIMD-ONLY0-NEXT: [[TMP775:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP775]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP776:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1249:%.*]] = zext i8 [[TMP776]] to i32 +// SIMD-ONLY0-NEXT: [[TMP777:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1250:%.*]] = zext i8 [[TMP777]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1251:%.*]] = icmp slt i32 [[CONV1249]], [[CONV1250]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1251]], label [[IF_THEN1253:%.*]], label [[IF_END1254:%.*]] +// SIMD-ONLY0: if.then1253: +// SIMD-ONLY0-NEXT: [[TMP778:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP778]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1254]] +// SIMD-ONLY0: if.end1254: +// SIMD-ONLY0-NEXT: [[TMP779:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP779]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP780:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1255:%.*]] = zext i8 [[TMP780]] to i32 +// SIMD-ONLY0-NEXT: [[TMP781:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1256:%.*]] = zext i8 [[TMP781]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1257:%.*]] = icmp eq i32 [[CONV1255]], [[CONV1256]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1257]], label [[IF_THEN1259:%.*]], label [[IF_END1260:%.*]] +// SIMD-ONLY0: if.then1259: +// SIMD-ONLY0-NEXT: [[TMP782:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP782]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1260]] +// SIMD-ONLY0: if.end1260: +// SIMD-ONLY0-NEXT: [[TMP783:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP783]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP784:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1261:%.*]] = zext i8 [[TMP784]] to i32 +// SIMD-ONLY0-NEXT: [[TMP785:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1262:%.*]] = zext i8 [[TMP785]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1263:%.*]] = icmp eq i32 [[CONV1261]], [[CONV1262]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1263]], label [[IF_THEN1265:%.*]], label [[IF_END1266:%.*]] +// SIMD-ONLY0: if.then1265: +// SIMD-ONLY0-NEXT: [[TMP786:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP786]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1266]] +// SIMD-ONLY0: if.end1266: +// SIMD-ONLY0-NEXT: [[TMP787:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP787]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP788:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1267:%.*]] = zext i8 [[TMP788]] to i32 +// SIMD-ONLY0-NEXT: [[TMP789:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1268:%.*]] = zext i8 [[TMP789]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1269:%.*]] = icmp eq i32 [[CONV1267]], [[CONV1268]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1269]], label [[IF_THEN1271:%.*]], label [[IF_ELSE1272:%.*]] +// SIMD-ONLY0: if.then1271: +// SIMD-ONLY0-NEXT: [[TMP790:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP790]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1273:%.*]] +// SIMD-ONLY0: if.else1272: +// SIMD-ONLY0-NEXT: [[TMP791:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP791]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1273]] +// SIMD-ONLY0: if.end1273: +// SIMD-ONLY0-NEXT: [[TMP792:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1274:%.*]] = zext i8 [[TMP792]] to i32 +// SIMD-ONLY0-NEXT: [[TMP793:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1275:%.*]] = zext i8 [[TMP793]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1276:%.*]] = icmp eq i32 [[CONV1274]], [[CONV1275]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1276]], label [[IF_THEN1278:%.*]], label [[IF_ELSE1279:%.*]] +// SIMD-ONLY0: if.then1278: +// SIMD-ONLY0-NEXT: [[TMP794:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP794]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1280:%.*]] +// SIMD-ONLY0: if.else1279: +// SIMD-ONLY0-NEXT: [[TMP795:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP795]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1280]] +// SIMD-ONLY0: if.end1280: +// SIMD-ONLY0-NEXT: [[TMP796:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1281:%.*]] = zext i8 [[TMP796]] to i32 +// SIMD-ONLY0-NEXT: [[TMP797:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1282:%.*]] = zext i8 [[TMP797]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1283:%.*]] = icmp eq i32 [[CONV1281]], [[CONV1282]] +// SIMD-ONLY0-NEXT: [[CONV1284:%.*]] = zext i1 [[CMP1283]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1285:%.*]] = trunc i32 [[CONV1284]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1285]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP798:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1286:%.*]] = icmp ne i8 [[TMP798]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1286]], label [[IF_THEN1287:%.*]], label [[IF_END1288:%.*]] +// SIMD-ONLY0: if.then1287: +// SIMD-ONLY0-NEXT: [[TMP799:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP799]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1288]] +// SIMD-ONLY0: if.end1288: +// SIMD-ONLY0-NEXT: [[TMP800:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1289:%.*]] = zext i8 [[TMP800]] to i32 +// SIMD-ONLY0-NEXT: [[TMP801:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1290:%.*]] = zext i8 [[TMP801]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1291:%.*]] = icmp eq i32 [[CONV1289]], [[CONV1290]] +// SIMD-ONLY0-NEXT: [[CONV1292:%.*]] = zext i1 [[CMP1291]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1293:%.*]] = trunc i32 [[CONV1292]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1293]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP802:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1294:%.*]] = icmp ne i8 [[TMP802]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1294]], label [[IF_THEN1295:%.*]], label [[IF_END1296:%.*]] +// SIMD-ONLY0: if.then1295: +// SIMD-ONLY0-NEXT: [[TMP803:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP803]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1296]] +// SIMD-ONLY0: if.end1296: +// SIMD-ONLY0-NEXT: [[TMP804:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1297:%.*]] = zext i8 [[TMP804]] to i32 +// SIMD-ONLY0-NEXT: [[TMP805:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1298:%.*]] = zext i8 [[TMP805]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1299:%.*]] = icmp eq i32 [[CONV1297]], [[CONV1298]] +// SIMD-ONLY0-NEXT: [[CONV1300:%.*]] = zext i1 [[CMP1299]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1301:%.*]] = trunc i32 [[CONV1300]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1301]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP806:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1302:%.*]] = icmp ne i8 [[TMP806]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1302]], label [[IF_THEN1303:%.*]], label [[IF_ELSE1304:%.*]] +// SIMD-ONLY0: if.then1303: +// SIMD-ONLY0-NEXT: [[TMP807:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP807]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1305:%.*]] +// SIMD-ONLY0: if.else1304: +// SIMD-ONLY0-NEXT: [[TMP808:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP808]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1305]] +// SIMD-ONLY0: if.end1305: +// SIMD-ONLY0-NEXT: [[TMP809:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1306:%.*]] = zext i8 [[TMP809]] to i32 +// SIMD-ONLY0-NEXT: [[TMP810:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1307:%.*]] = zext i8 [[TMP810]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1308:%.*]] = icmp eq i32 [[CONV1306]], [[CONV1307]] +// SIMD-ONLY0-NEXT: [[CONV1309:%.*]] = zext i1 [[CMP1308]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1310:%.*]] = trunc i32 [[CONV1309]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1310]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP811:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1311:%.*]] = icmp ne i8 [[TMP811]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1311]], label [[IF_THEN1312:%.*]], label [[IF_ELSE1313:%.*]] +// SIMD-ONLY0: if.then1312: +// SIMD-ONLY0-NEXT: [[TMP812:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP812]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1314:%.*]] +// SIMD-ONLY0: if.else1313: +// SIMD-ONLY0-NEXT: [[TMP813:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP813]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1314]] +// SIMD-ONLY0: if.end1314: +// SIMD-ONLY0-NEXT: [[TMP814:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP814]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP815:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1315:%.*]] = zext i8 [[TMP815]] to i32 +// SIMD-ONLY0-NEXT: [[TMP816:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1316:%.*]] = zext i8 [[TMP816]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1317:%.*]] = icmp sgt i32 [[CONV1315]], [[CONV1316]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1317]], label [[IF_THEN1319:%.*]], label [[IF_END1320:%.*]] +// SIMD-ONLY0: if.then1319: +// SIMD-ONLY0-NEXT: [[TMP817:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP817]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1320]] +// SIMD-ONLY0: if.end1320: +// SIMD-ONLY0-NEXT: [[TMP818:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP818]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP819:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1321:%.*]] = zext i8 [[TMP819]] to i32 +// SIMD-ONLY0-NEXT: [[TMP820:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1322:%.*]] = zext i8 [[TMP820]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1323:%.*]] = icmp sgt i32 [[CONV1321]], [[CONV1322]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1323]], label [[IF_THEN1325:%.*]], label [[IF_END1326:%.*]] +// SIMD-ONLY0: if.then1325: +// SIMD-ONLY0-NEXT: [[TMP821:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP821]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1326]] +// SIMD-ONLY0: if.end1326: +// SIMD-ONLY0-NEXT: [[TMP822:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP822]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP823:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1327:%.*]] = zext i8 [[TMP823]] to i32 +// SIMD-ONLY0-NEXT: [[TMP824:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1328:%.*]] = zext i8 [[TMP824]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1329:%.*]] = icmp slt i32 [[CONV1327]], [[CONV1328]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1329]], label [[IF_THEN1331:%.*]], label [[IF_END1332:%.*]] +// SIMD-ONLY0: if.then1331: +// SIMD-ONLY0-NEXT: [[TMP825:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP825]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1332]] +// SIMD-ONLY0: if.end1332: +// SIMD-ONLY0-NEXT: [[TMP826:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP826]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP827:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1333:%.*]] = zext i8 [[TMP827]] to i32 +// SIMD-ONLY0-NEXT: [[TMP828:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1334:%.*]] = zext i8 [[TMP828]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1335:%.*]] = icmp slt i32 [[CONV1333]], [[CONV1334]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1335]], label [[IF_THEN1337:%.*]], label [[IF_END1338:%.*]] +// SIMD-ONLY0: if.then1337: +// SIMD-ONLY0-NEXT: [[TMP829:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP829]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1338]] +// SIMD-ONLY0: if.end1338: +// SIMD-ONLY0-NEXT: [[TMP830:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP830]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP831:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1339:%.*]] = zext i8 [[TMP831]] to i32 +// SIMD-ONLY0-NEXT: [[TMP832:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1340:%.*]] = zext i8 [[TMP832]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1341:%.*]] = icmp eq i32 [[CONV1339]], [[CONV1340]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1341]], label [[IF_THEN1343:%.*]], label [[IF_END1344:%.*]] +// SIMD-ONLY0: if.then1343: +// SIMD-ONLY0-NEXT: [[TMP833:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP833]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1344]] +// SIMD-ONLY0: if.end1344: +// SIMD-ONLY0-NEXT: [[TMP834:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP834]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP835:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1345:%.*]] = zext i8 [[TMP835]] to i32 +// SIMD-ONLY0-NEXT: [[TMP836:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1346:%.*]] = zext i8 [[TMP836]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1347:%.*]] = icmp eq i32 [[CONV1345]], [[CONV1346]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1347]], label [[IF_THEN1349:%.*]], label [[IF_END1350:%.*]] +// SIMD-ONLY0: if.then1349: +// SIMD-ONLY0-NEXT: [[TMP837:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP837]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1350]] +// SIMD-ONLY0: if.end1350: +// SIMD-ONLY0-NEXT: [[TMP838:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1351:%.*]] = zext i8 [[TMP838]] to i32 +// SIMD-ONLY0-NEXT: [[TMP839:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1352:%.*]] = zext i8 [[TMP839]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1353:%.*]] = icmp sgt i32 [[CONV1351]], [[CONV1352]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1353]], label [[IF_THEN1355:%.*]], label [[IF_END1356:%.*]] +// SIMD-ONLY0: if.then1355: +// SIMD-ONLY0-NEXT: [[TMP840:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP840]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1356]] +// SIMD-ONLY0: if.end1356: +// SIMD-ONLY0-NEXT: [[TMP841:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP841]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP842:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1357:%.*]] = zext i8 [[TMP842]] to i32 +// SIMD-ONLY0-NEXT: [[TMP843:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1358:%.*]] = zext i8 [[TMP843]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1359:%.*]] = icmp sgt i32 [[CONV1357]], [[CONV1358]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1359]], label [[IF_THEN1361:%.*]], label [[IF_END1362:%.*]] +// SIMD-ONLY0: if.then1361: +// SIMD-ONLY0-NEXT: [[TMP844:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP844]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1362]] +// SIMD-ONLY0: if.end1362: +// SIMD-ONLY0-NEXT: [[TMP845:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP845]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP846:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1363:%.*]] = zext i8 [[TMP846]] to i32 +// SIMD-ONLY0-NEXT: [[TMP847:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1364:%.*]] = zext i8 [[TMP847]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1365:%.*]] = icmp slt i32 [[CONV1363]], [[CONV1364]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1365]], label [[IF_THEN1367:%.*]], label [[IF_END1368:%.*]] +// SIMD-ONLY0: if.then1367: +// SIMD-ONLY0-NEXT: [[TMP848:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP848]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1368]] +// SIMD-ONLY0: if.end1368: +// SIMD-ONLY0-NEXT: [[TMP849:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP849]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP850:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1369:%.*]] = zext i8 [[TMP850]] to i32 +// SIMD-ONLY0-NEXT: [[TMP851:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1370:%.*]] = zext i8 [[TMP851]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1371:%.*]] = icmp slt i32 [[CONV1369]], [[CONV1370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1371]], label [[IF_THEN1373:%.*]], label [[IF_END1374:%.*]] +// SIMD-ONLY0: if.then1373: +// SIMD-ONLY0-NEXT: [[TMP852:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP852]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1374]] +// SIMD-ONLY0: if.end1374: +// SIMD-ONLY0-NEXT: [[TMP853:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP853]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP854:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1375:%.*]] = zext i8 [[TMP854]] to i32 +// SIMD-ONLY0-NEXT: [[TMP855:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1376:%.*]] = zext i8 [[TMP855]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1377:%.*]] = icmp eq i32 [[CONV1375]], [[CONV1376]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1377]], label [[IF_THEN1379:%.*]], label [[IF_END1380:%.*]] +// SIMD-ONLY0: if.then1379: +// SIMD-ONLY0-NEXT: [[TMP856:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP856]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1380]] +// SIMD-ONLY0: if.end1380: +// SIMD-ONLY0-NEXT: [[TMP857:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP857]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP858:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1381:%.*]] = zext i8 [[TMP858]] to i32 +// SIMD-ONLY0-NEXT: [[TMP859:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1382:%.*]] = zext i8 [[TMP859]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1383:%.*]] = icmp eq i32 [[CONV1381]], [[CONV1382]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1383]], label [[IF_THEN1385:%.*]], label [[IF_END1386:%.*]] +// SIMD-ONLY0: if.then1385: +// SIMD-ONLY0-NEXT: [[TMP860:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP860]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1386]] +// SIMD-ONLY0: if.end1386: +// SIMD-ONLY0-NEXT: [[TMP861:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP861]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP862:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1387:%.*]] = zext i8 [[TMP862]] to i32 +// SIMD-ONLY0-NEXT: [[TMP863:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1388:%.*]] = zext i8 [[TMP863]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1389:%.*]] = icmp eq i32 [[CONV1387]], [[CONV1388]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1389]], label [[IF_THEN1391:%.*]], label [[IF_ELSE1392:%.*]] +// SIMD-ONLY0: if.then1391: +// SIMD-ONLY0-NEXT: [[TMP864:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP864]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1393:%.*]] +// SIMD-ONLY0: if.else1392: +// SIMD-ONLY0-NEXT: [[TMP865:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP865]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1393]] +// SIMD-ONLY0: if.end1393: +// SIMD-ONLY0-NEXT: [[TMP866:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1394:%.*]] = zext i8 [[TMP866]] to i32 +// SIMD-ONLY0-NEXT: [[TMP867:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1395:%.*]] = zext i8 [[TMP867]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1396:%.*]] = icmp eq i32 [[CONV1394]], [[CONV1395]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1396]], label [[IF_THEN1398:%.*]], label [[IF_ELSE1399:%.*]] +// SIMD-ONLY0: if.then1398: +// SIMD-ONLY0-NEXT: [[TMP868:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP868]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1400:%.*]] +// SIMD-ONLY0: if.else1399: +// SIMD-ONLY0-NEXT: [[TMP869:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP869]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1400]] +// SIMD-ONLY0: if.end1400: +// SIMD-ONLY0-NEXT: [[TMP870:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1401:%.*]] = zext i8 [[TMP870]] to i32 +// SIMD-ONLY0-NEXT: [[TMP871:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1402:%.*]] = zext i8 [[TMP871]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1403:%.*]] = icmp eq i32 [[CONV1401]], [[CONV1402]] +// SIMD-ONLY0-NEXT: [[CONV1404:%.*]] = zext i1 [[CMP1403]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1405:%.*]] = trunc i32 [[CONV1404]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1405]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP872:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1406:%.*]] = icmp ne i8 [[TMP872]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1406]], label [[IF_THEN1407:%.*]], label [[IF_END1408:%.*]] +// SIMD-ONLY0: if.then1407: +// SIMD-ONLY0-NEXT: [[TMP873:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP873]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1408]] +// SIMD-ONLY0: if.end1408: +// SIMD-ONLY0-NEXT: [[TMP874:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1409:%.*]] = zext i8 [[TMP874]] to i32 +// SIMD-ONLY0-NEXT: [[TMP875:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1410:%.*]] = zext i8 [[TMP875]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1411:%.*]] = icmp eq i32 [[CONV1409]], [[CONV1410]] +// SIMD-ONLY0-NEXT: [[CONV1412:%.*]] = zext i1 [[CMP1411]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1413:%.*]] = trunc i32 [[CONV1412]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1413]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP876:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1414:%.*]] = icmp ne i8 [[TMP876]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1414]], label [[IF_THEN1415:%.*]], label [[IF_END1416:%.*]] +// SIMD-ONLY0: if.then1415: +// SIMD-ONLY0-NEXT: [[TMP877:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP877]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1416]] +// SIMD-ONLY0: if.end1416: +// SIMD-ONLY0-NEXT: [[TMP878:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1417:%.*]] = zext i8 [[TMP878]] to i32 +// SIMD-ONLY0-NEXT: [[TMP879:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1418:%.*]] = zext i8 [[TMP879]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1419:%.*]] = icmp eq i32 [[CONV1417]], [[CONV1418]] +// SIMD-ONLY0-NEXT: [[CONV1420:%.*]] = zext i1 [[CMP1419]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1421:%.*]] = trunc i32 [[CONV1420]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1421]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP880:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1422:%.*]] = icmp ne i8 [[TMP880]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1422]], label [[IF_THEN1423:%.*]], label [[IF_ELSE1424:%.*]] +// SIMD-ONLY0: if.then1423: +// SIMD-ONLY0-NEXT: [[TMP881:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP881]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1425:%.*]] +// SIMD-ONLY0: if.else1424: +// SIMD-ONLY0-NEXT: [[TMP882:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP882]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1425]] +// SIMD-ONLY0: if.end1425: +// SIMD-ONLY0-NEXT: [[TMP883:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1426:%.*]] = zext i8 [[TMP883]] to i32 +// SIMD-ONLY0-NEXT: [[TMP884:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1427:%.*]] = zext i8 [[TMP884]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1428:%.*]] = icmp eq i32 [[CONV1426]], [[CONV1427]] +// SIMD-ONLY0-NEXT: [[CONV1429:%.*]] = zext i1 [[CMP1428]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1430:%.*]] = trunc i32 [[CONV1429]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV1430]], ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TMP885:%.*]] = load i8, ptr [[UCR]], align 1 +// SIMD-ONLY0-NEXT: [[TOBOOL1431:%.*]] = icmp ne i8 [[TMP885]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1431]], label [[IF_THEN1432:%.*]], label [[IF_ELSE1433:%.*]] +// SIMD-ONLY0: if.then1432: +// SIMD-ONLY0-NEXT: [[TMP886:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP886]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1434:%.*]] +// SIMD-ONLY0: if.else1433: +// SIMD-ONLY0-NEXT: [[TMP887:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP887]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: br label [[IF_END1434]] +// SIMD-ONLY0: if.end1434: +// SIMD-ONLY0-NEXT: [[TMP888:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP888]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP889:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1435:%.*]] = sext i16 [[TMP889]] to i32 +// SIMD-ONLY0-NEXT: [[TMP890:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1436:%.*]] = sext i16 [[TMP890]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1437:%.*]] = icmp sgt i32 [[CONV1435]], [[CONV1436]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1437]], label [[IF_THEN1439:%.*]], label [[IF_END1440:%.*]] +// SIMD-ONLY0: if.then1439: +// SIMD-ONLY0-NEXT: [[TMP891:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP891]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1440]] +// SIMD-ONLY0: if.end1440: +// SIMD-ONLY0-NEXT: [[TMP892:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP892]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP893:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1441:%.*]] = sext i16 [[TMP893]] to i32 +// SIMD-ONLY0-NEXT: [[TMP894:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1442:%.*]] = sext i16 [[TMP894]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1443:%.*]] = icmp sgt i32 [[CONV1441]], [[CONV1442]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1443]], label [[IF_THEN1445:%.*]], label [[IF_END1446:%.*]] +// SIMD-ONLY0: if.then1445: +// SIMD-ONLY0-NEXT: [[TMP895:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP895]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1446]] +// SIMD-ONLY0: if.end1446: +// SIMD-ONLY0-NEXT: [[TMP896:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP896]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP897:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1447:%.*]] = sext i16 [[TMP897]] to i32 +// SIMD-ONLY0-NEXT: [[TMP898:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1448:%.*]] = sext i16 [[TMP898]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1449:%.*]] = icmp slt i32 [[CONV1447]], [[CONV1448]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1449]], label [[IF_THEN1451:%.*]], label [[IF_END1452:%.*]] +// SIMD-ONLY0: if.then1451: +// SIMD-ONLY0-NEXT: [[TMP899:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP899]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1452]] +// SIMD-ONLY0: if.end1452: +// SIMD-ONLY0-NEXT: [[TMP900:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP900]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP901:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1453:%.*]] = sext i16 [[TMP901]] to i32 +// SIMD-ONLY0-NEXT: [[TMP902:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1454:%.*]] = sext i16 [[TMP902]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1455:%.*]] = icmp slt i32 [[CONV1453]], [[CONV1454]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1455]], label [[IF_THEN1457:%.*]], label [[IF_END1458:%.*]] +// SIMD-ONLY0: if.then1457: +// SIMD-ONLY0-NEXT: [[TMP903:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP903]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1458]] +// SIMD-ONLY0: if.end1458: +// SIMD-ONLY0-NEXT: [[TMP904:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP904]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP905:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1459:%.*]] = sext i16 [[TMP905]] to i32 +// SIMD-ONLY0-NEXT: [[TMP906:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1460:%.*]] = sext i16 [[TMP906]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1461:%.*]] = icmp eq i32 [[CONV1459]], [[CONV1460]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1461]], label [[IF_THEN1463:%.*]], label [[IF_END1464:%.*]] +// SIMD-ONLY0: if.then1463: +// SIMD-ONLY0-NEXT: [[TMP907:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP907]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1464]] +// SIMD-ONLY0: if.end1464: +// SIMD-ONLY0-NEXT: [[TMP908:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP908]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP909:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1465:%.*]] = sext i16 [[TMP909]] to i32 +// SIMD-ONLY0-NEXT: [[TMP910:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1466:%.*]] = sext i16 [[TMP910]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1467:%.*]] = icmp eq i32 [[CONV1465]], [[CONV1466]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1467]], label [[IF_THEN1469:%.*]], label [[IF_END1470:%.*]] +// SIMD-ONLY0: if.then1469: +// SIMD-ONLY0-NEXT: [[TMP911:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP911]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1470]] +// SIMD-ONLY0: if.end1470: +// SIMD-ONLY0-NEXT: [[TMP912:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1471:%.*]] = sext i16 [[TMP912]] to i32 +// SIMD-ONLY0-NEXT: [[TMP913:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1472:%.*]] = sext i16 [[TMP913]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1473:%.*]] = icmp sgt i32 [[CONV1471]], [[CONV1472]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1473]], label [[IF_THEN1475:%.*]], label [[IF_END1476:%.*]] +// SIMD-ONLY0: if.then1475: +// SIMD-ONLY0-NEXT: [[TMP914:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP914]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1476]] +// SIMD-ONLY0: if.end1476: +// SIMD-ONLY0-NEXT: [[TMP915:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP915]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP916:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1477:%.*]] = sext i16 [[TMP916]] to i32 +// SIMD-ONLY0-NEXT: [[TMP917:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1478:%.*]] = sext i16 [[TMP917]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1479:%.*]] = icmp sgt i32 [[CONV1477]], [[CONV1478]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1479]], label [[IF_THEN1481:%.*]], label [[IF_END1482:%.*]] +// SIMD-ONLY0: if.then1481: +// SIMD-ONLY0-NEXT: [[TMP918:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP918]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1482]] +// SIMD-ONLY0: if.end1482: +// SIMD-ONLY0-NEXT: [[TMP919:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP919]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP920:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1483:%.*]] = sext i16 [[TMP920]] to i32 +// SIMD-ONLY0-NEXT: [[TMP921:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1484:%.*]] = sext i16 [[TMP921]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1485:%.*]] = icmp slt i32 [[CONV1483]], [[CONV1484]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1485]], label [[IF_THEN1487:%.*]], label [[IF_END1488:%.*]] +// SIMD-ONLY0: if.then1487: +// SIMD-ONLY0-NEXT: [[TMP922:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP922]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1488]] +// SIMD-ONLY0: if.end1488: +// SIMD-ONLY0-NEXT: [[TMP923:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP923]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP924:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1489:%.*]] = sext i16 [[TMP924]] to i32 +// SIMD-ONLY0-NEXT: [[TMP925:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1490:%.*]] = sext i16 [[TMP925]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1491:%.*]] = icmp slt i32 [[CONV1489]], [[CONV1490]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1491]], label [[IF_THEN1493:%.*]], label [[IF_END1494:%.*]] +// SIMD-ONLY0: if.then1493: +// SIMD-ONLY0-NEXT: [[TMP926:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP926]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1494]] +// SIMD-ONLY0: if.end1494: +// SIMD-ONLY0-NEXT: [[TMP927:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP927]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP928:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1495:%.*]] = sext i16 [[TMP928]] to i32 +// SIMD-ONLY0-NEXT: [[TMP929:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1496:%.*]] = sext i16 [[TMP929]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1497:%.*]] = icmp eq i32 [[CONV1495]], [[CONV1496]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1497]], label [[IF_THEN1499:%.*]], label [[IF_END1500:%.*]] +// SIMD-ONLY0: if.then1499: +// SIMD-ONLY0-NEXT: [[TMP930:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP930]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1500]] +// SIMD-ONLY0: if.end1500: +// SIMD-ONLY0-NEXT: [[TMP931:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP931]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP932:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1501:%.*]] = sext i16 [[TMP932]] to i32 +// SIMD-ONLY0-NEXT: [[TMP933:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1502:%.*]] = sext i16 [[TMP933]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1503:%.*]] = icmp eq i32 [[CONV1501]], [[CONV1502]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1503]], label [[IF_THEN1505:%.*]], label [[IF_END1506:%.*]] +// SIMD-ONLY0: if.then1505: +// SIMD-ONLY0-NEXT: [[TMP934:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP934]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1506]] +// SIMD-ONLY0: if.end1506: +// SIMD-ONLY0-NEXT: [[TMP935:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP935]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP936:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1507:%.*]] = sext i16 [[TMP936]] to i32 +// SIMD-ONLY0-NEXT: [[TMP937:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1508:%.*]] = sext i16 [[TMP937]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1509:%.*]] = icmp eq i32 [[CONV1507]], [[CONV1508]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1509]], label [[IF_THEN1511:%.*]], label [[IF_ELSE1512:%.*]] +// SIMD-ONLY0: if.then1511: +// SIMD-ONLY0-NEXT: [[TMP938:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP938]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1513:%.*]] +// SIMD-ONLY0: if.else1512: +// SIMD-ONLY0-NEXT: [[TMP939:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP939]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1513]] +// SIMD-ONLY0: if.end1513: +// SIMD-ONLY0-NEXT: [[TMP940:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1514:%.*]] = sext i16 [[TMP940]] to i32 +// SIMD-ONLY0-NEXT: [[TMP941:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1515:%.*]] = sext i16 [[TMP941]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1516:%.*]] = icmp eq i32 [[CONV1514]], [[CONV1515]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1516]], label [[IF_THEN1518:%.*]], label [[IF_ELSE1519:%.*]] +// SIMD-ONLY0: if.then1518: +// SIMD-ONLY0-NEXT: [[TMP942:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP942]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1520:%.*]] +// SIMD-ONLY0: if.else1519: +// SIMD-ONLY0-NEXT: [[TMP943:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP943]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1520]] +// SIMD-ONLY0: if.end1520: +// SIMD-ONLY0-NEXT: [[TMP944:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1521:%.*]] = sext i16 [[TMP944]] to i32 +// SIMD-ONLY0-NEXT: [[TMP945:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1522:%.*]] = sext i16 [[TMP945]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1523:%.*]] = icmp eq i32 [[CONV1521]], [[CONV1522]] +// SIMD-ONLY0-NEXT: [[CONV1524:%.*]] = zext i1 [[CMP1523]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1525:%.*]] = trunc i32 [[CONV1524]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1525]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP946:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1526:%.*]] = icmp ne i16 [[TMP946]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1526]], label [[IF_THEN1527:%.*]], label [[IF_END1528:%.*]] +// SIMD-ONLY0: if.then1527: +// SIMD-ONLY0-NEXT: [[TMP947:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP947]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1528]] +// SIMD-ONLY0: if.end1528: +// SIMD-ONLY0-NEXT: [[TMP948:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1529:%.*]] = sext i16 [[TMP948]] to i32 +// SIMD-ONLY0-NEXT: [[TMP949:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1530:%.*]] = sext i16 [[TMP949]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1531:%.*]] = icmp eq i32 [[CONV1529]], [[CONV1530]] +// SIMD-ONLY0-NEXT: [[CONV1532:%.*]] = zext i1 [[CMP1531]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1533:%.*]] = trunc i32 [[CONV1532]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1533]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP950:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1534:%.*]] = icmp ne i16 [[TMP950]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1534]], label [[IF_THEN1535:%.*]], label [[IF_END1536:%.*]] +// SIMD-ONLY0: if.then1535: +// SIMD-ONLY0-NEXT: [[TMP951:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP951]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1536]] +// SIMD-ONLY0: if.end1536: +// SIMD-ONLY0-NEXT: [[TMP952:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1537:%.*]] = sext i16 [[TMP952]] to i32 +// SIMD-ONLY0-NEXT: [[TMP953:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1538:%.*]] = sext i16 [[TMP953]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1539:%.*]] = icmp eq i32 [[CONV1537]], [[CONV1538]] +// SIMD-ONLY0-NEXT: [[CONV1540:%.*]] = zext i1 [[CMP1539]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1541:%.*]] = trunc i32 [[CONV1540]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1541]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP954:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1542:%.*]] = icmp ne i16 [[TMP954]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1542]], label [[IF_THEN1543:%.*]], label [[IF_ELSE1544:%.*]] +// SIMD-ONLY0: if.then1543: +// SIMD-ONLY0-NEXT: [[TMP955:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP955]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1545:%.*]] +// SIMD-ONLY0: if.else1544: +// SIMD-ONLY0-NEXT: [[TMP956:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP956]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1545]] +// SIMD-ONLY0: if.end1545: +// SIMD-ONLY0-NEXT: [[TMP957:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1546:%.*]] = sext i16 [[TMP957]] to i32 +// SIMD-ONLY0-NEXT: [[TMP958:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1547:%.*]] = sext i16 [[TMP958]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1548:%.*]] = icmp eq i32 [[CONV1546]], [[CONV1547]] +// SIMD-ONLY0-NEXT: [[CONV1549:%.*]] = zext i1 [[CMP1548]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1550:%.*]] = trunc i32 [[CONV1549]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1550]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP959:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1551:%.*]] = icmp ne i16 [[TMP959]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1551]], label [[IF_THEN1552:%.*]], label [[IF_ELSE1553:%.*]] +// SIMD-ONLY0: if.then1552: +// SIMD-ONLY0-NEXT: [[TMP960:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP960]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1554:%.*]] +// SIMD-ONLY0: if.else1553: +// SIMD-ONLY0-NEXT: [[TMP961:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP961]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1554]] +// SIMD-ONLY0: if.end1554: +// SIMD-ONLY0-NEXT: [[TMP962:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP962]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP963:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1555:%.*]] = sext i16 [[TMP963]] to i32 +// SIMD-ONLY0-NEXT: [[TMP964:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1556:%.*]] = sext i16 [[TMP964]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1557:%.*]] = icmp sgt i32 [[CONV1555]], [[CONV1556]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1557]], label [[IF_THEN1559:%.*]], label [[IF_END1560:%.*]] +// SIMD-ONLY0: if.then1559: +// SIMD-ONLY0-NEXT: [[TMP965:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP965]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1560]] +// SIMD-ONLY0: if.end1560: +// SIMD-ONLY0-NEXT: [[TMP966:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP966]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP967:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1561:%.*]] = sext i16 [[TMP967]] to i32 +// SIMD-ONLY0-NEXT: [[TMP968:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1562:%.*]] = sext i16 [[TMP968]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1563:%.*]] = icmp sgt i32 [[CONV1561]], [[CONV1562]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1563]], label [[IF_THEN1565:%.*]], label [[IF_END1566:%.*]] +// SIMD-ONLY0: if.then1565: +// SIMD-ONLY0-NEXT: [[TMP969:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP969]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1566]] +// SIMD-ONLY0: if.end1566: +// SIMD-ONLY0-NEXT: [[TMP970:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP970]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP971:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1567:%.*]] = sext i16 [[TMP971]] to i32 +// SIMD-ONLY0-NEXT: [[TMP972:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1568:%.*]] = sext i16 [[TMP972]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1569:%.*]] = icmp slt i32 [[CONV1567]], [[CONV1568]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1569]], label [[IF_THEN1571:%.*]], label [[IF_END1572:%.*]] +// SIMD-ONLY0: if.then1571: +// SIMD-ONLY0-NEXT: [[TMP973:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP973]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1572]] +// SIMD-ONLY0: if.end1572: +// SIMD-ONLY0-NEXT: [[TMP974:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP974]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP975:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1573:%.*]] = sext i16 [[TMP975]] to i32 +// SIMD-ONLY0-NEXT: [[TMP976:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1574:%.*]] = sext i16 [[TMP976]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1575:%.*]] = icmp slt i32 [[CONV1573]], [[CONV1574]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1575]], label [[IF_THEN1577:%.*]], label [[IF_END1578:%.*]] +// SIMD-ONLY0: if.then1577: +// SIMD-ONLY0-NEXT: [[TMP977:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP977]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1578]] +// SIMD-ONLY0: if.end1578: +// SIMD-ONLY0-NEXT: [[TMP978:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP978]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP979:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1579:%.*]] = sext i16 [[TMP979]] to i32 +// SIMD-ONLY0-NEXT: [[TMP980:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1580:%.*]] = sext i16 [[TMP980]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1581:%.*]] = icmp eq i32 [[CONV1579]], [[CONV1580]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1581]], label [[IF_THEN1583:%.*]], label [[IF_END1584:%.*]] +// SIMD-ONLY0: if.then1583: +// SIMD-ONLY0-NEXT: [[TMP981:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP981]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1584]] +// SIMD-ONLY0: if.end1584: +// SIMD-ONLY0-NEXT: [[TMP982:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP982]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP983:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1585:%.*]] = sext i16 [[TMP983]] to i32 +// SIMD-ONLY0-NEXT: [[TMP984:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1586:%.*]] = sext i16 [[TMP984]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1587:%.*]] = icmp eq i32 [[CONV1585]], [[CONV1586]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1587]], label [[IF_THEN1589:%.*]], label [[IF_END1590:%.*]] +// SIMD-ONLY0: if.then1589: +// SIMD-ONLY0-NEXT: [[TMP985:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP985]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1590]] +// SIMD-ONLY0: if.end1590: +// SIMD-ONLY0-NEXT: [[TMP986:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1591:%.*]] = sext i16 [[TMP986]] to i32 +// SIMD-ONLY0-NEXT: [[TMP987:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1592:%.*]] = sext i16 [[TMP987]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1593:%.*]] = icmp sgt i32 [[CONV1591]], [[CONV1592]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1593]], label [[IF_THEN1595:%.*]], label [[IF_END1596:%.*]] +// SIMD-ONLY0: if.then1595: +// SIMD-ONLY0-NEXT: [[TMP988:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP988]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1596]] +// SIMD-ONLY0: if.end1596: +// SIMD-ONLY0-NEXT: [[TMP989:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP989]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP990:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1597:%.*]] = sext i16 [[TMP990]] to i32 +// SIMD-ONLY0-NEXT: [[TMP991:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1598:%.*]] = sext i16 [[TMP991]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1599:%.*]] = icmp sgt i32 [[CONV1597]], [[CONV1598]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1599]], label [[IF_THEN1601:%.*]], label [[IF_END1602:%.*]] +// SIMD-ONLY0: if.then1601: +// SIMD-ONLY0-NEXT: [[TMP992:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP992]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1602]] +// SIMD-ONLY0: if.end1602: +// SIMD-ONLY0-NEXT: [[TMP993:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP993]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP994:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1603:%.*]] = sext i16 [[TMP994]] to i32 +// SIMD-ONLY0-NEXT: [[TMP995:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1604:%.*]] = sext i16 [[TMP995]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1605:%.*]] = icmp slt i32 [[CONV1603]], [[CONV1604]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1605]], label [[IF_THEN1607:%.*]], label [[IF_END1608:%.*]] +// SIMD-ONLY0: if.then1607: +// SIMD-ONLY0-NEXT: [[TMP996:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP996]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1608]] +// SIMD-ONLY0: if.end1608: +// SIMD-ONLY0-NEXT: [[TMP997:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP997]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP998:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1609:%.*]] = sext i16 [[TMP998]] to i32 +// SIMD-ONLY0-NEXT: [[TMP999:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1610:%.*]] = sext i16 [[TMP999]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1611:%.*]] = icmp slt i32 [[CONV1609]], [[CONV1610]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1611]], label [[IF_THEN1613:%.*]], label [[IF_END1614:%.*]] +// SIMD-ONLY0: if.then1613: +// SIMD-ONLY0-NEXT: [[TMP1000:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1000]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1614]] +// SIMD-ONLY0: if.end1614: +// SIMD-ONLY0-NEXT: [[TMP1001:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1001]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1002:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1615:%.*]] = sext i16 [[TMP1002]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1003:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1616:%.*]] = sext i16 [[TMP1003]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1617:%.*]] = icmp eq i32 [[CONV1615]], [[CONV1616]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1617]], label [[IF_THEN1619:%.*]], label [[IF_END1620:%.*]] +// SIMD-ONLY0: if.then1619: +// SIMD-ONLY0-NEXT: [[TMP1004:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1004]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1620]] +// SIMD-ONLY0: if.end1620: +// SIMD-ONLY0-NEXT: [[TMP1005:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1005]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1006:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1621:%.*]] = sext i16 [[TMP1006]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1007:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1622:%.*]] = sext i16 [[TMP1007]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1623:%.*]] = icmp eq i32 [[CONV1621]], [[CONV1622]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1623]], label [[IF_THEN1625:%.*]], label [[IF_END1626:%.*]] +// SIMD-ONLY0: if.then1625: +// SIMD-ONLY0-NEXT: [[TMP1008:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1008]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1626]] +// SIMD-ONLY0: if.end1626: +// SIMD-ONLY0-NEXT: [[TMP1009:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1009]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1010:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1627:%.*]] = sext i16 [[TMP1010]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1011:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1628:%.*]] = sext i16 [[TMP1011]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1629:%.*]] = icmp eq i32 [[CONV1627]], [[CONV1628]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1629]], label [[IF_THEN1631:%.*]], label [[IF_ELSE1632:%.*]] +// SIMD-ONLY0: if.then1631: +// SIMD-ONLY0-NEXT: [[TMP1012:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1012]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1633:%.*]] +// SIMD-ONLY0: if.else1632: +// SIMD-ONLY0-NEXT: [[TMP1013:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1013]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1633]] +// SIMD-ONLY0: if.end1633: +// SIMD-ONLY0-NEXT: [[TMP1014:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1634:%.*]] = sext i16 [[TMP1014]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1015:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1635:%.*]] = sext i16 [[TMP1015]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1636:%.*]] = icmp eq i32 [[CONV1634]], [[CONV1635]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1636]], label [[IF_THEN1638:%.*]], label [[IF_ELSE1639:%.*]] +// SIMD-ONLY0: if.then1638: +// SIMD-ONLY0-NEXT: [[TMP1016:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1016]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1640:%.*]] +// SIMD-ONLY0: if.else1639: +// SIMD-ONLY0-NEXT: [[TMP1017:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1017]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1640]] +// SIMD-ONLY0: if.end1640: +// SIMD-ONLY0-NEXT: [[TMP1018:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1641:%.*]] = sext i16 [[TMP1018]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1019:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1642:%.*]] = sext i16 [[TMP1019]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1643:%.*]] = icmp eq i32 [[CONV1641]], [[CONV1642]] +// SIMD-ONLY0-NEXT: [[CONV1644:%.*]] = zext i1 [[CMP1643]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1645:%.*]] = trunc i32 [[CONV1644]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1645]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1020:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1646:%.*]] = icmp ne i16 [[TMP1020]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1646]], label [[IF_THEN1647:%.*]], label [[IF_END1648:%.*]] +// SIMD-ONLY0: if.then1647: +// SIMD-ONLY0-NEXT: [[TMP1021:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1021]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1648]] +// SIMD-ONLY0: if.end1648: +// SIMD-ONLY0-NEXT: [[TMP1022:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1649:%.*]] = sext i16 [[TMP1022]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1023:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1650:%.*]] = sext i16 [[TMP1023]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1651:%.*]] = icmp eq i32 [[CONV1649]], [[CONV1650]] +// SIMD-ONLY0-NEXT: [[CONV1652:%.*]] = zext i1 [[CMP1651]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1653:%.*]] = trunc i32 [[CONV1652]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1653]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1024:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1654:%.*]] = icmp ne i16 [[TMP1024]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1654]], label [[IF_THEN1655:%.*]], label [[IF_END1656:%.*]] +// SIMD-ONLY0: if.then1655: +// SIMD-ONLY0-NEXT: [[TMP1025:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1025]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1656]] +// SIMD-ONLY0: if.end1656: +// SIMD-ONLY0-NEXT: [[TMP1026:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1657:%.*]] = sext i16 [[TMP1026]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1027:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1658:%.*]] = sext i16 [[TMP1027]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1659:%.*]] = icmp eq i32 [[CONV1657]], [[CONV1658]] +// SIMD-ONLY0-NEXT: [[CONV1660:%.*]] = zext i1 [[CMP1659]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1661:%.*]] = trunc i32 [[CONV1660]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1661]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1028:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1662:%.*]] = icmp ne i16 [[TMP1028]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1662]], label [[IF_THEN1663:%.*]], label [[IF_ELSE1664:%.*]] +// SIMD-ONLY0: if.then1663: +// SIMD-ONLY0-NEXT: [[TMP1029:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1029]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1665:%.*]] +// SIMD-ONLY0: if.else1664: +// SIMD-ONLY0-NEXT: [[TMP1030:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1030]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1665]] +// SIMD-ONLY0: if.end1665: +// SIMD-ONLY0-NEXT: [[TMP1031:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1666:%.*]] = sext i16 [[TMP1031]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1032:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1667:%.*]] = sext i16 [[TMP1032]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1668:%.*]] = icmp eq i32 [[CONV1666]], [[CONV1667]] +// SIMD-ONLY0-NEXT: [[CONV1669:%.*]] = zext i1 [[CMP1668]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1670:%.*]] = trunc i32 [[CONV1669]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1670]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1033:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1671:%.*]] = icmp ne i16 [[TMP1033]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1671]], label [[IF_THEN1672:%.*]], label [[IF_ELSE1673:%.*]] +// SIMD-ONLY0: if.then1672: +// SIMD-ONLY0-NEXT: [[TMP1034:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1034]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1674:%.*]] +// SIMD-ONLY0: if.else1673: +// SIMD-ONLY0-NEXT: [[TMP1035:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1035]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1674]] +// SIMD-ONLY0: if.end1674: +// SIMD-ONLY0-NEXT: [[TMP1036:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1036]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1037:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1675:%.*]] = sext i16 [[TMP1037]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1038:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1676:%.*]] = sext i16 [[TMP1038]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1677:%.*]] = icmp sgt i32 [[CONV1675]], [[CONV1676]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1677]], label [[IF_THEN1679:%.*]], label [[IF_END1680:%.*]] +// SIMD-ONLY0: if.then1679: +// SIMD-ONLY0-NEXT: [[TMP1039:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1039]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1680]] +// SIMD-ONLY0: if.end1680: +// SIMD-ONLY0-NEXT: [[TMP1040:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1040]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1041:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1681:%.*]] = sext i16 [[TMP1041]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1042:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1682:%.*]] = sext i16 [[TMP1042]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1683:%.*]] = icmp sgt i32 [[CONV1681]], [[CONV1682]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1683]], label [[IF_THEN1685:%.*]], label [[IF_END1686:%.*]] +// SIMD-ONLY0: if.then1685: +// SIMD-ONLY0-NEXT: [[TMP1043:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1043]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1686]] +// SIMD-ONLY0: if.end1686: +// SIMD-ONLY0-NEXT: [[TMP1044:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1044]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1045:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1687:%.*]] = sext i16 [[TMP1045]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1046:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1688:%.*]] = sext i16 [[TMP1046]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1689:%.*]] = icmp slt i32 [[CONV1687]], [[CONV1688]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1689]], label [[IF_THEN1691:%.*]], label [[IF_END1692:%.*]] +// SIMD-ONLY0: if.then1691: +// SIMD-ONLY0-NEXT: [[TMP1047:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1047]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1692]] +// SIMD-ONLY0: if.end1692: +// SIMD-ONLY0-NEXT: [[TMP1048:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1048]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1049:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1693:%.*]] = sext i16 [[TMP1049]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1050:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1694:%.*]] = sext i16 [[TMP1050]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1695:%.*]] = icmp slt i32 [[CONV1693]], [[CONV1694]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1695]], label [[IF_THEN1697:%.*]], label [[IF_END1698:%.*]] +// SIMD-ONLY0: if.then1697: +// SIMD-ONLY0-NEXT: [[TMP1051:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1051]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1698]] +// SIMD-ONLY0: if.end1698: +// SIMD-ONLY0-NEXT: [[TMP1052:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1052]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1053:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1699:%.*]] = sext i16 [[TMP1053]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1054:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1700:%.*]] = sext i16 [[TMP1054]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1701:%.*]] = icmp eq i32 [[CONV1699]], [[CONV1700]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1701]], label [[IF_THEN1703:%.*]], label [[IF_END1704:%.*]] +// SIMD-ONLY0: if.then1703: +// SIMD-ONLY0-NEXT: [[TMP1055:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1055]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1704]] +// SIMD-ONLY0: if.end1704: +// SIMD-ONLY0-NEXT: [[TMP1056:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1056]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1057:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1705:%.*]] = sext i16 [[TMP1057]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1058:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1706:%.*]] = sext i16 [[TMP1058]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1707:%.*]] = icmp eq i32 [[CONV1705]], [[CONV1706]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1707]], label [[IF_THEN1709:%.*]], label [[IF_END1710:%.*]] +// SIMD-ONLY0: if.then1709: +// SIMD-ONLY0-NEXT: [[TMP1059:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1059]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1710]] +// SIMD-ONLY0: if.end1710: +// SIMD-ONLY0-NEXT: [[TMP1060:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1711:%.*]] = sext i16 [[TMP1060]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1061:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1712:%.*]] = sext i16 [[TMP1061]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1713:%.*]] = icmp sgt i32 [[CONV1711]], [[CONV1712]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1713]], label [[IF_THEN1715:%.*]], label [[IF_END1716:%.*]] +// SIMD-ONLY0: if.then1715: +// SIMD-ONLY0-NEXT: [[TMP1062:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1062]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1716]] +// SIMD-ONLY0: if.end1716: +// SIMD-ONLY0-NEXT: [[TMP1063:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1063]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1064:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1717:%.*]] = sext i16 [[TMP1064]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1065:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1718:%.*]] = sext i16 [[TMP1065]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1719:%.*]] = icmp sgt i32 [[CONV1717]], [[CONV1718]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1719]], label [[IF_THEN1721:%.*]], label [[IF_END1722:%.*]] +// SIMD-ONLY0: if.then1721: +// SIMD-ONLY0-NEXT: [[TMP1066:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1066]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1722]] +// SIMD-ONLY0: if.end1722: +// SIMD-ONLY0-NEXT: [[TMP1067:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1067]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1068:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1723:%.*]] = sext i16 [[TMP1068]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1069:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1724:%.*]] = sext i16 [[TMP1069]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1725:%.*]] = icmp slt i32 [[CONV1723]], [[CONV1724]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1725]], label [[IF_THEN1727:%.*]], label [[IF_END1728:%.*]] +// SIMD-ONLY0: if.then1727: +// SIMD-ONLY0-NEXT: [[TMP1070:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1070]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1728]] +// SIMD-ONLY0: if.end1728: +// SIMD-ONLY0-NEXT: [[TMP1071:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1071]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1072:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1729:%.*]] = sext i16 [[TMP1072]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1073:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1730:%.*]] = sext i16 [[TMP1073]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1731:%.*]] = icmp slt i32 [[CONV1729]], [[CONV1730]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1731]], label [[IF_THEN1733:%.*]], label [[IF_END1734:%.*]] +// SIMD-ONLY0: if.then1733: +// SIMD-ONLY0-NEXT: [[TMP1074:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1074]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1734]] +// SIMD-ONLY0: if.end1734: +// SIMD-ONLY0-NEXT: [[TMP1075:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1075]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1076:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1735:%.*]] = sext i16 [[TMP1076]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1077:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1736:%.*]] = sext i16 [[TMP1077]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1737:%.*]] = icmp eq i32 [[CONV1735]], [[CONV1736]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1737]], label [[IF_THEN1739:%.*]], label [[IF_END1740:%.*]] +// SIMD-ONLY0: if.then1739: +// SIMD-ONLY0-NEXT: [[TMP1078:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1078]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1740]] +// SIMD-ONLY0: if.end1740: +// SIMD-ONLY0-NEXT: [[TMP1079:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1079]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1080:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1741:%.*]] = sext i16 [[TMP1080]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1081:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1742:%.*]] = sext i16 [[TMP1081]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1743:%.*]] = icmp eq i32 [[CONV1741]], [[CONV1742]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1743]], label [[IF_THEN1745:%.*]], label [[IF_END1746:%.*]] +// SIMD-ONLY0: if.then1745: +// SIMD-ONLY0-NEXT: [[TMP1082:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1082]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1746]] +// SIMD-ONLY0: if.end1746: +// SIMD-ONLY0-NEXT: [[TMP1083:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1083]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1084:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1747:%.*]] = sext i16 [[TMP1084]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1085:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1748:%.*]] = sext i16 [[TMP1085]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1749:%.*]] = icmp eq i32 [[CONV1747]], [[CONV1748]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1749]], label [[IF_THEN1751:%.*]], label [[IF_ELSE1752:%.*]] +// SIMD-ONLY0: if.then1751: +// SIMD-ONLY0-NEXT: [[TMP1086:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1086]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1753:%.*]] +// SIMD-ONLY0: if.else1752: +// SIMD-ONLY0-NEXT: [[TMP1087:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1087]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1753]] +// SIMD-ONLY0: if.end1753: +// SIMD-ONLY0-NEXT: [[TMP1088:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1754:%.*]] = sext i16 [[TMP1088]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1089:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1755:%.*]] = sext i16 [[TMP1089]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1756:%.*]] = icmp eq i32 [[CONV1754]], [[CONV1755]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1756]], label [[IF_THEN1758:%.*]], label [[IF_ELSE1759:%.*]] +// SIMD-ONLY0: if.then1758: +// SIMD-ONLY0-NEXT: [[TMP1090:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1090]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1760:%.*]] +// SIMD-ONLY0: if.else1759: +// SIMD-ONLY0-NEXT: [[TMP1091:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1091]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1760]] +// SIMD-ONLY0: if.end1760: +// SIMD-ONLY0-NEXT: [[TMP1092:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1761:%.*]] = sext i16 [[TMP1092]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1093:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1762:%.*]] = sext i16 [[TMP1093]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1763:%.*]] = icmp eq i32 [[CONV1761]], [[CONV1762]] +// SIMD-ONLY0-NEXT: [[CONV1764:%.*]] = zext i1 [[CMP1763]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1765:%.*]] = trunc i32 [[CONV1764]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1765]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1094:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1766:%.*]] = icmp ne i16 [[TMP1094]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1766]], label [[IF_THEN1767:%.*]], label [[IF_END1768:%.*]] +// SIMD-ONLY0: if.then1767: +// SIMD-ONLY0-NEXT: [[TMP1095:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1095]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1768]] +// SIMD-ONLY0: if.end1768: +// SIMD-ONLY0-NEXT: [[TMP1096:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1769:%.*]] = sext i16 [[TMP1096]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1097:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1770:%.*]] = sext i16 [[TMP1097]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1771:%.*]] = icmp eq i32 [[CONV1769]], [[CONV1770]] +// SIMD-ONLY0-NEXT: [[CONV1772:%.*]] = zext i1 [[CMP1771]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1773:%.*]] = trunc i32 [[CONV1772]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1773]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1098:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1774:%.*]] = icmp ne i16 [[TMP1098]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1774]], label [[IF_THEN1775:%.*]], label [[IF_END1776:%.*]] +// SIMD-ONLY0: if.then1775: +// SIMD-ONLY0-NEXT: [[TMP1099:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1099]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1776]] +// SIMD-ONLY0: if.end1776: +// SIMD-ONLY0-NEXT: [[TMP1100:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1777:%.*]] = sext i16 [[TMP1100]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1101:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1778:%.*]] = sext i16 [[TMP1101]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1779:%.*]] = icmp eq i32 [[CONV1777]], [[CONV1778]] +// SIMD-ONLY0-NEXT: [[CONV1780:%.*]] = zext i1 [[CMP1779]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1781:%.*]] = trunc i32 [[CONV1780]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1781]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1102:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1782:%.*]] = icmp ne i16 [[TMP1102]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1782]], label [[IF_THEN1783:%.*]], label [[IF_ELSE1784:%.*]] +// SIMD-ONLY0: if.then1783: +// SIMD-ONLY0-NEXT: [[TMP1103:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1103]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1785:%.*]] +// SIMD-ONLY0: if.else1784: +// SIMD-ONLY0-NEXT: [[TMP1104:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1104]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1785]] +// SIMD-ONLY0: if.end1785: +// SIMD-ONLY0-NEXT: [[TMP1105:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1786:%.*]] = sext i16 [[TMP1105]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1106:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1787:%.*]] = sext i16 [[TMP1106]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1788:%.*]] = icmp eq i32 [[CONV1786]], [[CONV1787]] +// SIMD-ONLY0-NEXT: [[CONV1789:%.*]] = zext i1 [[CMP1788]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1790:%.*]] = trunc i32 [[CONV1789]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1790]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1107:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1791:%.*]] = icmp ne i16 [[TMP1107]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1791]], label [[IF_THEN1792:%.*]], label [[IF_ELSE1793:%.*]] +// SIMD-ONLY0: if.then1792: +// SIMD-ONLY0-NEXT: [[TMP1108:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1108]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1794:%.*]] +// SIMD-ONLY0: if.else1793: +// SIMD-ONLY0-NEXT: [[TMP1109:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1109]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1794]] +// SIMD-ONLY0: if.end1794: +// SIMD-ONLY0-NEXT: [[TMP1110:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1110]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1111:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1795:%.*]] = sext i16 [[TMP1111]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1112:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1796:%.*]] = sext i16 [[TMP1112]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1797:%.*]] = icmp sgt i32 [[CONV1795]], [[CONV1796]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1797]], label [[IF_THEN1799:%.*]], label [[IF_END1800:%.*]] +// SIMD-ONLY0: if.then1799: +// SIMD-ONLY0-NEXT: [[TMP1113:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1113]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1800]] +// SIMD-ONLY0: if.end1800: +// SIMD-ONLY0-NEXT: [[TMP1114:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1114]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1115:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1801:%.*]] = sext i16 [[TMP1115]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1116:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1802:%.*]] = sext i16 [[TMP1116]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1803:%.*]] = icmp sgt i32 [[CONV1801]], [[CONV1802]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1803]], label [[IF_THEN1805:%.*]], label [[IF_END1806:%.*]] +// SIMD-ONLY0: if.then1805: +// SIMD-ONLY0-NEXT: [[TMP1117:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1117]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1806]] +// SIMD-ONLY0: if.end1806: +// SIMD-ONLY0-NEXT: [[TMP1118:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1118]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1119:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1807:%.*]] = sext i16 [[TMP1119]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1120:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1808:%.*]] = sext i16 [[TMP1120]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1809:%.*]] = icmp slt i32 [[CONV1807]], [[CONV1808]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1809]], label [[IF_THEN1811:%.*]], label [[IF_END1812:%.*]] +// SIMD-ONLY0: if.then1811: +// SIMD-ONLY0-NEXT: [[TMP1121:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1121]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1812]] +// SIMD-ONLY0: if.end1812: +// SIMD-ONLY0-NEXT: [[TMP1122:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1122]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1123:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1813:%.*]] = sext i16 [[TMP1123]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1124:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1814:%.*]] = sext i16 [[TMP1124]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1815:%.*]] = icmp slt i32 [[CONV1813]], [[CONV1814]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1815]], label [[IF_THEN1817:%.*]], label [[IF_END1818:%.*]] +// SIMD-ONLY0: if.then1817: +// SIMD-ONLY0-NEXT: [[TMP1125:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1125]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1818]] +// SIMD-ONLY0: if.end1818: +// SIMD-ONLY0-NEXT: [[TMP1126:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1126]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1127:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1819:%.*]] = sext i16 [[TMP1127]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1128:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1820:%.*]] = sext i16 [[TMP1128]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1821:%.*]] = icmp eq i32 [[CONV1819]], [[CONV1820]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1821]], label [[IF_THEN1823:%.*]], label [[IF_END1824:%.*]] +// SIMD-ONLY0: if.then1823: +// SIMD-ONLY0-NEXT: [[TMP1129:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1129]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1824]] +// SIMD-ONLY0: if.end1824: +// SIMD-ONLY0-NEXT: [[TMP1130:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1130]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1131:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1825:%.*]] = sext i16 [[TMP1131]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1132:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1826:%.*]] = sext i16 [[TMP1132]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1827:%.*]] = icmp eq i32 [[CONV1825]], [[CONV1826]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1827]], label [[IF_THEN1829:%.*]], label [[IF_END1830:%.*]] +// SIMD-ONLY0: if.then1829: +// SIMD-ONLY0-NEXT: [[TMP1133:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1133]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1830]] +// SIMD-ONLY0: if.end1830: +// SIMD-ONLY0-NEXT: [[TMP1134:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1831:%.*]] = sext i16 [[TMP1134]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1135:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1832:%.*]] = sext i16 [[TMP1135]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1833:%.*]] = icmp sgt i32 [[CONV1831]], [[CONV1832]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1833]], label [[IF_THEN1835:%.*]], label [[IF_END1836:%.*]] +// SIMD-ONLY0: if.then1835: +// SIMD-ONLY0-NEXT: [[TMP1136:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1136]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1836]] +// SIMD-ONLY0: if.end1836: +// SIMD-ONLY0-NEXT: [[TMP1137:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1137]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1138:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1837:%.*]] = sext i16 [[TMP1138]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1139:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1838:%.*]] = sext i16 [[TMP1139]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1839:%.*]] = icmp sgt i32 [[CONV1837]], [[CONV1838]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1839]], label [[IF_THEN1841:%.*]], label [[IF_END1842:%.*]] +// SIMD-ONLY0: if.then1841: +// SIMD-ONLY0-NEXT: [[TMP1140:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1140]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1842]] +// SIMD-ONLY0: if.end1842: +// SIMD-ONLY0-NEXT: [[TMP1141:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1141]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1142:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1843:%.*]] = sext i16 [[TMP1142]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1143:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1844:%.*]] = sext i16 [[TMP1143]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1845:%.*]] = icmp slt i32 [[CONV1843]], [[CONV1844]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1845]], label [[IF_THEN1847:%.*]], label [[IF_END1848:%.*]] +// SIMD-ONLY0: if.then1847: +// SIMD-ONLY0-NEXT: [[TMP1144:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1144]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1848]] +// SIMD-ONLY0: if.end1848: +// SIMD-ONLY0-NEXT: [[TMP1145:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1145]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1146:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1849:%.*]] = sext i16 [[TMP1146]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1147:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1850:%.*]] = sext i16 [[TMP1147]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1851:%.*]] = icmp slt i32 [[CONV1849]], [[CONV1850]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1851]], label [[IF_THEN1853:%.*]], label [[IF_END1854:%.*]] +// SIMD-ONLY0: if.then1853: +// SIMD-ONLY0-NEXT: [[TMP1148:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1148]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1854]] +// SIMD-ONLY0: if.end1854: +// SIMD-ONLY0-NEXT: [[TMP1149:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1149]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1150:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1855:%.*]] = sext i16 [[TMP1150]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1151:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1856:%.*]] = sext i16 [[TMP1151]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1857:%.*]] = icmp eq i32 [[CONV1855]], [[CONV1856]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1857]], label [[IF_THEN1859:%.*]], label [[IF_END1860:%.*]] +// SIMD-ONLY0: if.then1859: +// SIMD-ONLY0-NEXT: [[TMP1152:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1152]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1860]] +// SIMD-ONLY0: if.end1860: +// SIMD-ONLY0-NEXT: [[TMP1153:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1153]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1154:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1861:%.*]] = sext i16 [[TMP1154]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1155:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1862:%.*]] = sext i16 [[TMP1155]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1863:%.*]] = icmp eq i32 [[CONV1861]], [[CONV1862]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1863]], label [[IF_THEN1865:%.*]], label [[IF_END1866:%.*]] +// SIMD-ONLY0: if.then1865: +// SIMD-ONLY0-NEXT: [[TMP1156:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1156]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1866]] +// SIMD-ONLY0: if.end1866: +// SIMD-ONLY0-NEXT: [[TMP1157:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1157]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1158:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1867:%.*]] = sext i16 [[TMP1158]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1159:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1868:%.*]] = sext i16 [[TMP1159]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1869:%.*]] = icmp eq i32 [[CONV1867]], [[CONV1868]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1869]], label [[IF_THEN1871:%.*]], label [[IF_ELSE1872:%.*]] +// SIMD-ONLY0: if.then1871: +// SIMD-ONLY0-NEXT: [[TMP1160:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1160]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1873:%.*]] +// SIMD-ONLY0: if.else1872: +// SIMD-ONLY0-NEXT: [[TMP1161:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1161]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1873]] +// SIMD-ONLY0: if.end1873: +// SIMD-ONLY0-NEXT: [[TMP1162:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1874:%.*]] = sext i16 [[TMP1162]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1163:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1875:%.*]] = sext i16 [[TMP1163]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1876:%.*]] = icmp eq i32 [[CONV1874]], [[CONV1875]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1876]], label [[IF_THEN1878:%.*]], label [[IF_ELSE1879:%.*]] +// SIMD-ONLY0: if.then1878: +// SIMD-ONLY0-NEXT: [[TMP1164:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1164]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1880:%.*]] +// SIMD-ONLY0: if.else1879: +// SIMD-ONLY0-NEXT: [[TMP1165:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1165]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1880]] +// SIMD-ONLY0: if.end1880: +// SIMD-ONLY0-NEXT: [[TMP1166:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1881:%.*]] = sext i16 [[TMP1166]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1167:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1882:%.*]] = sext i16 [[TMP1167]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1883:%.*]] = icmp eq i32 [[CONV1881]], [[CONV1882]] +// SIMD-ONLY0-NEXT: [[CONV1884:%.*]] = zext i1 [[CMP1883]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1885:%.*]] = trunc i32 [[CONV1884]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1885]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1168:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1886:%.*]] = icmp ne i16 [[TMP1168]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1886]], label [[IF_THEN1887:%.*]], label [[IF_END1888:%.*]] +// SIMD-ONLY0: if.then1887: +// SIMD-ONLY0-NEXT: [[TMP1169:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1169]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1888]] +// SIMD-ONLY0: if.end1888: +// SIMD-ONLY0-NEXT: [[TMP1170:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1889:%.*]] = sext i16 [[TMP1170]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1171:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1890:%.*]] = sext i16 [[TMP1171]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1891:%.*]] = icmp eq i32 [[CONV1889]], [[CONV1890]] +// SIMD-ONLY0-NEXT: [[CONV1892:%.*]] = zext i1 [[CMP1891]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1893:%.*]] = trunc i32 [[CONV1892]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1893]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1172:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1894:%.*]] = icmp ne i16 [[TMP1172]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1894]], label [[IF_THEN1895:%.*]], label [[IF_END1896:%.*]] +// SIMD-ONLY0: if.then1895: +// SIMD-ONLY0-NEXT: [[TMP1173:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1173]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1896]] +// SIMD-ONLY0: if.end1896: +// SIMD-ONLY0-NEXT: [[TMP1174:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1897:%.*]] = sext i16 [[TMP1174]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1175:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1898:%.*]] = sext i16 [[TMP1175]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1899:%.*]] = icmp eq i32 [[CONV1897]], [[CONV1898]] +// SIMD-ONLY0-NEXT: [[CONV1900:%.*]] = zext i1 [[CMP1899]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1901:%.*]] = trunc i32 [[CONV1900]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1901]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1176:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1902:%.*]] = icmp ne i16 [[TMP1176]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1902]], label [[IF_THEN1903:%.*]], label [[IF_ELSE1904:%.*]] +// SIMD-ONLY0: if.then1903: +// SIMD-ONLY0-NEXT: [[TMP1177:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1177]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1905:%.*]] +// SIMD-ONLY0: if.else1904: +// SIMD-ONLY0-NEXT: [[TMP1178:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1178]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1905]] +// SIMD-ONLY0: if.end1905: +// SIMD-ONLY0-NEXT: [[TMP1179:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1906:%.*]] = sext i16 [[TMP1179]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1180:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1907:%.*]] = sext i16 [[TMP1180]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1908:%.*]] = icmp eq i32 [[CONV1906]], [[CONV1907]] +// SIMD-ONLY0-NEXT: [[CONV1909:%.*]] = zext i1 [[CMP1908]] to i32 +// SIMD-ONLY0-NEXT: [[CONV1910:%.*]] = trunc i32 [[CONV1909]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV1910]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1181:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL1911:%.*]] = icmp ne i16 [[TMP1181]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL1911]], label [[IF_THEN1912:%.*]], label [[IF_ELSE1913:%.*]] +// SIMD-ONLY0: if.then1912: +// SIMD-ONLY0-NEXT: [[TMP1182:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1182]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1914:%.*]] +// SIMD-ONLY0: if.else1913: +// SIMD-ONLY0-NEXT: [[TMP1183:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1183]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1914]] +// SIMD-ONLY0: if.end1914: +// SIMD-ONLY0-NEXT: [[TMP1184:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1184]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1185:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1915:%.*]] = sext i16 [[TMP1185]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1186:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1916:%.*]] = sext i16 [[TMP1186]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1917:%.*]] = icmp sgt i32 [[CONV1915]], [[CONV1916]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1917]], label [[IF_THEN1919:%.*]], label [[IF_END1920:%.*]] +// SIMD-ONLY0: if.then1919: +// SIMD-ONLY0-NEXT: [[TMP1187:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1187]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1920]] +// SIMD-ONLY0: if.end1920: +// SIMD-ONLY0-NEXT: [[TMP1188:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1188]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1189:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1921:%.*]] = sext i16 [[TMP1189]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1190:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1922:%.*]] = sext i16 [[TMP1190]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1923:%.*]] = icmp sgt i32 [[CONV1921]], [[CONV1922]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1923]], label [[IF_THEN1925:%.*]], label [[IF_END1926:%.*]] +// SIMD-ONLY0: if.then1925: +// SIMD-ONLY0-NEXT: [[TMP1191:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1191]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1926]] +// SIMD-ONLY0: if.end1926: +// SIMD-ONLY0-NEXT: [[TMP1192:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1192]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1193:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1927:%.*]] = sext i16 [[TMP1193]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1194:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1928:%.*]] = sext i16 [[TMP1194]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1929:%.*]] = icmp slt i32 [[CONV1927]], [[CONV1928]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1929]], label [[IF_THEN1931:%.*]], label [[IF_END1932:%.*]] +// SIMD-ONLY0: if.then1931: +// SIMD-ONLY0-NEXT: [[TMP1195:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1195]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1932]] +// SIMD-ONLY0: if.end1932: +// SIMD-ONLY0-NEXT: [[TMP1196:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1196]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1197:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1933:%.*]] = sext i16 [[TMP1197]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1198:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1934:%.*]] = sext i16 [[TMP1198]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1935:%.*]] = icmp slt i32 [[CONV1933]], [[CONV1934]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1935]], label [[IF_THEN1937:%.*]], label [[IF_END1938:%.*]] +// SIMD-ONLY0: if.then1937: +// SIMD-ONLY0-NEXT: [[TMP1199:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1199]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1938]] +// SIMD-ONLY0: if.end1938: +// SIMD-ONLY0-NEXT: [[TMP1200:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1200]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1201:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1939:%.*]] = sext i16 [[TMP1201]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1202:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1940:%.*]] = sext i16 [[TMP1202]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1941:%.*]] = icmp eq i32 [[CONV1939]], [[CONV1940]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1941]], label [[IF_THEN1943:%.*]], label [[IF_END1944:%.*]] +// SIMD-ONLY0: if.then1943: +// SIMD-ONLY0-NEXT: [[TMP1203:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1203]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1944]] +// SIMD-ONLY0: if.end1944: +// SIMD-ONLY0-NEXT: [[TMP1204:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1204]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1205:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1945:%.*]] = sext i16 [[TMP1205]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1206:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1946:%.*]] = sext i16 [[TMP1206]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1947:%.*]] = icmp eq i32 [[CONV1945]], [[CONV1946]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1947]], label [[IF_THEN1949:%.*]], label [[IF_END1950:%.*]] +// SIMD-ONLY0: if.then1949: +// SIMD-ONLY0-NEXT: [[TMP1207:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1207]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1950]] +// SIMD-ONLY0: if.end1950: +// SIMD-ONLY0-NEXT: [[TMP1208:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1951:%.*]] = sext i16 [[TMP1208]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1209:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1952:%.*]] = sext i16 [[TMP1209]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1953:%.*]] = icmp sgt i32 [[CONV1951]], [[CONV1952]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1953]], label [[IF_THEN1955:%.*]], label [[IF_END1956:%.*]] +// SIMD-ONLY0: if.then1955: +// SIMD-ONLY0-NEXT: [[TMP1210:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1210]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1956]] +// SIMD-ONLY0: if.end1956: +// SIMD-ONLY0-NEXT: [[TMP1211:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1211]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1212:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1957:%.*]] = sext i16 [[TMP1212]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1213:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1958:%.*]] = sext i16 [[TMP1213]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1959:%.*]] = icmp sgt i32 [[CONV1957]], [[CONV1958]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1959]], label [[IF_THEN1961:%.*]], label [[IF_END1962:%.*]] +// SIMD-ONLY0: if.then1961: +// SIMD-ONLY0-NEXT: [[TMP1214:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1214]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1962]] +// SIMD-ONLY0: if.end1962: +// SIMD-ONLY0-NEXT: [[TMP1215:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1215]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1216:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1963:%.*]] = sext i16 [[TMP1216]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1217:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1964:%.*]] = sext i16 [[TMP1217]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1965:%.*]] = icmp slt i32 [[CONV1963]], [[CONV1964]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1965]], label [[IF_THEN1967:%.*]], label [[IF_END1968:%.*]] +// SIMD-ONLY0: if.then1967: +// SIMD-ONLY0-NEXT: [[TMP1218:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1218]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1968]] +// SIMD-ONLY0: if.end1968: +// SIMD-ONLY0-NEXT: [[TMP1219:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1219]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1220:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1969:%.*]] = sext i16 [[TMP1220]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1221:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1970:%.*]] = sext i16 [[TMP1221]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1971:%.*]] = icmp slt i32 [[CONV1969]], [[CONV1970]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1971]], label [[IF_THEN1973:%.*]], label [[IF_END1974:%.*]] +// SIMD-ONLY0: if.then1973: +// SIMD-ONLY0-NEXT: [[TMP1222:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1222]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1974]] +// SIMD-ONLY0: if.end1974: +// SIMD-ONLY0-NEXT: [[TMP1223:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1223]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1224:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1975:%.*]] = sext i16 [[TMP1224]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1225:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1976:%.*]] = sext i16 [[TMP1225]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1977:%.*]] = icmp eq i32 [[CONV1975]], [[CONV1976]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1977]], label [[IF_THEN1979:%.*]], label [[IF_END1980:%.*]] +// SIMD-ONLY0: if.then1979: +// SIMD-ONLY0-NEXT: [[TMP1226:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1226]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1980]] +// SIMD-ONLY0: if.end1980: +// SIMD-ONLY0-NEXT: [[TMP1227:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1227]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1228:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1981:%.*]] = sext i16 [[TMP1228]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1229:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1982:%.*]] = sext i16 [[TMP1229]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1983:%.*]] = icmp eq i32 [[CONV1981]], [[CONV1982]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1983]], label [[IF_THEN1985:%.*]], label [[IF_END1986:%.*]] +// SIMD-ONLY0: if.then1985: +// SIMD-ONLY0-NEXT: [[TMP1230:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1230]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1986]] +// SIMD-ONLY0: if.end1986: +// SIMD-ONLY0-NEXT: [[TMP1231:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1231]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1232:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1987:%.*]] = sext i16 [[TMP1232]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1233:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1988:%.*]] = sext i16 [[TMP1233]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1989:%.*]] = icmp eq i32 [[CONV1987]], [[CONV1988]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1989]], label [[IF_THEN1991:%.*]], label [[IF_ELSE1992:%.*]] +// SIMD-ONLY0: if.then1991: +// SIMD-ONLY0-NEXT: [[TMP1234:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1234]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1993:%.*]] +// SIMD-ONLY0: if.else1992: +// SIMD-ONLY0-NEXT: [[TMP1235:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1235]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END1993]] +// SIMD-ONLY0: if.end1993: +// SIMD-ONLY0-NEXT: [[TMP1236:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1994:%.*]] = sext i16 [[TMP1236]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1237:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1995:%.*]] = sext i16 [[TMP1237]] to i32 +// SIMD-ONLY0-NEXT: [[CMP1996:%.*]] = icmp eq i32 [[CONV1994]], [[CONV1995]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1996]], label [[IF_THEN1998:%.*]], label [[IF_ELSE1999:%.*]] +// SIMD-ONLY0: if.then1998: +// SIMD-ONLY0-NEXT: [[TMP1238:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1238]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2000:%.*]] +// SIMD-ONLY0: if.else1999: +// SIMD-ONLY0-NEXT: [[TMP1239:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1239]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2000]] +// SIMD-ONLY0: if.end2000: +// SIMD-ONLY0-NEXT: [[TMP1240:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2001:%.*]] = sext i16 [[TMP1240]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1241:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2002:%.*]] = sext i16 [[TMP1241]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2003:%.*]] = icmp eq i32 [[CONV2001]], [[CONV2002]] +// SIMD-ONLY0-NEXT: [[CONV2004:%.*]] = zext i1 [[CMP2003]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2005:%.*]] = trunc i32 [[CONV2004]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2005]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1242:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2006:%.*]] = icmp ne i16 [[TMP1242]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2006]], label [[IF_THEN2007:%.*]], label [[IF_END2008:%.*]] +// SIMD-ONLY0: if.then2007: +// SIMD-ONLY0-NEXT: [[TMP1243:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1243]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2008]] +// SIMD-ONLY0: if.end2008: +// SIMD-ONLY0-NEXT: [[TMP1244:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2009:%.*]] = sext i16 [[TMP1244]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1245:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2010:%.*]] = sext i16 [[TMP1245]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2011:%.*]] = icmp eq i32 [[CONV2009]], [[CONV2010]] +// SIMD-ONLY0-NEXT: [[CONV2012:%.*]] = zext i1 [[CMP2011]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2013:%.*]] = trunc i32 [[CONV2012]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2013]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1246:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2014:%.*]] = icmp ne i16 [[TMP1246]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2014]], label [[IF_THEN2015:%.*]], label [[IF_END2016:%.*]] +// SIMD-ONLY0: if.then2015: +// SIMD-ONLY0-NEXT: [[TMP1247:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1247]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2016]] +// SIMD-ONLY0: if.end2016: +// SIMD-ONLY0-NEXT: [[TMP1248:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2017:%.*]] = sext i16 [[TMP1248]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1249:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2018:%.*]] = sext i16 [[TMP1249]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2019:%.*]] = icmp eq i32 [[CONV2017]], [[CONV2018]] +// SIMD-ONLY0-NEXT: [[CONV2020:%.*]] = zext i1 [[CMP2019]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2021:%.*]] = trunc i32 [[CONV2020]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2021]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1250:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2022:%.*]] = icmp ne i16 [[TMP1250]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2022]], label [[IF_THEN2023:%.*]], label [[IF_ELSE2024:%.*]] +// SIMD-ONLY0: if.then2023: +// SIMD-ONLY0-NEXT: [[TMP1251:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1251]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2025:%.*]] +// SIMD-ONLY0: if.else2024: +// SIMD-ONLY0-NEXT: [[TMP1252:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1252]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2025]] +// SIMD-ONLY0: if.end2025: +// SIMD-ONLY0-NEXT: [[TMP1253:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2026:%.*]] = sext i16 [[TMP1253]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1254:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2027:%.*]] = sext i16 [[TMP1254]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2028:%.*]] = icmp eq i32 [[CONV2026]], [[CONV2027]] +// SIMD-ONLY0-NEXT: [[CONV2029:%.*]] = zext i1 [[CMP2028]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2030:%.*]] = trunc i32 [[CONV2029]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2030]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1255:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2031:%.*]] = icmp ne i16 [[TMP1255]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2031]], label [[IF_THEN2032:%.*]], label [[IF_ELSE2033:%.*]] +// SIMD-ONLY0: if.then2032: +// SIMD-ONLY0-NEXT: [[TMP1256:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1256]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2034:%.*]] +// SIMD-ONLY0: if.else2033: +// SIMD-ONLY0-NEXT: [[TMP1257:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1257]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2034]] +// SIMD-ONLY0: if.end2034: +// SIMD-ONLY0-NEXT: [[TMP1258:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1258]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1259:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2035:%.*]] = sext i16 [[TMP1259]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1260:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2036:%.*]] = sext i16 [[TMP1260]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2037:%.*]] = icmp sgt i32 [[CONV2035]], [[CONV2036]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2037]], label [[IF_THEN2039:%.*]], label [[IF_END2040:%.*]] +// SIMD-ONLY0: if.then2039: +// SIMD-ONLY0-NEXT: [[TMP1261:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1261]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2040]] +// SIMD-ONLY0: if.end2040: +// SIMD-ONLY0-NEXT: [[TMP1262:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1262]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1263:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2041:%.*]] = sext i16 [[TMP1263]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1264:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2042:%.*]] = sext i16 [[TMP1264]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2043:%.*]] = icmp sgt i32 [[CONV2041]], [[CONV2042]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2043]], label [[IF_THEN2045:%.*]], label [[IF_END2046:%.*]] +// SIMD-ONLY0: if.then2045: +// SIMD-ONLY0-NEXT: [[TMP1265:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1265]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2046]] +// SIMD-ONLY0: if.end2046: +// SIMD-ONLY0-NEXT: [[TMP1266:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1266]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1267:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2047:%.*]] = sext i16 [[TMP1267]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1268:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2048:%.*]] = sext i16 [[TMP1268]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2049:%.*]] = icmp slt i32 [[CONV2047]], [[CONV2048]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2049]], label [[IF_THEN2051:%.*]], label [[IF_END2052:%.*]] +// SIMD-ONLY0: if.then2051: +// SIMD-ONLY0-NEXT: [[TMP1269:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1269]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2052]] +// SIMD-ONLY0: if.end2052: +// SIMD-ONLY0-NEXT: [[TMP1270:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1270]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1271:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2053:%.*]] = sext i16 [[TMP1271]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1272:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2054:%.*]] = sext i16 [[TMP1272]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2055:%.*]] = icmp slt i32 [[CONV2053]], [[CONV2054]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2055]], label [[IF_THEN2057:%.*]], label [[IF_END2058:%.*]] +// SIMD-ONLY0: if.then2057: +// SIMD-ONLY0-NEXT: [[TMP1273:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1273]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2058]] +// SIMD-ONLY0: if.end2058: +// SIMD-ONLY0-NEXT: [[TMP1274:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1274]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1275:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2059:%.*]] = sext i16 [[TMP1275]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1276:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2060:%.*]] = sext i16 [[TMP1276]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2061:%.*]] = icmp eq i32 [[CONV2059]], [[CONV2060]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2061]], label [[IF_THEN2063:%.*]], label [[IF_END2064:%.*]] +// SIMD-ONLY0: if.then2063: +// SIMD-ONLY0-NEXT: [[TMP1277:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1277]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2064]] +// SIMD-ONLY0: if.end2064: +// SIMD-ONLY0-NEXT: [[TMP1278:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1278]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1279:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2065:%.*]] = sext i16 [[TMP1279]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1280:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2066:%.*]] = sext i16 [[TMP1280]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2067:%.*]] = icmp eq i32 [[CONV2065]], [[CONV2066]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2067]], label [[IF_THEN2069:%.*]], label [[IF_END2070:%.*]] +// SIMD-ONLY0: if.then2069: +// SIMD-ONLY0-NEXT: [[TMP1281:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1281]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2070]] +// SIMD-ONLY0: if.end2070: +// SIMD-ONLY0-NEXT: [[TMP1282:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2071:%.*]] = sext i16 [[TMP1282]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1283:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2072:%.*]] = sext i16 [[TMP1283]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2073:%.*]] = icmp sgt i32 [[CONV2071]], [[CONV2072]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2073]], label [[IF_THEN2075:%.*]], label [[IF_END2076:%.*]] +// SIMD-ONLY0: if.then2075: +// SIMD-ONLY0-NEXT: [[TMP1284:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1284]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2076]] +// SIMD-ONLY0: if.end2076: +// SIMD-ONLY0-NEXT: [[TMP1285:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1285]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1286:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2077:%.*]] = sext i16 [[TMP1286]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1287:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2078:%.*]] = sext i16 [[TMP1287]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2079:%.*]] = icmp sgt i32 [[CONV2077]], [[CONV2078]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2079]], label [[IF_THEN2081:%.*]], label [[IF_END2082:%.*]] +// SIMD-ONLY0: if.then2081: +// SIMD-ONLY0-NEXT: [[TMP1288:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1288]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2082]] +// SIMD-ONLY0: if.end2082: +// SIMD-ONLY0-NEXT: [[TMP1289:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1289]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1290:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2083:%.*]] = sext i16 [[TMP1290]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1291:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2084:%.*]] = sext i16 [[TMP1291]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2085:%.*]] = icmp slt i32 [[CONV2083]], [[CONV2084]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2085]], label [[IF_THEN2087:%.*]], label [[IF_END2088:%.*]] +// SIMD-ONLY0: if.then2087: +// SIMD-ONLY0-NEXT: [[TMP1292:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1292]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2088]] +// SIMD-ONLY0: if.end2088: +// SIMD-ONLY0-NEXT: [[TMP1293:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1293]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1294:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2089:%.*]] = sext i16 [[TMP1294]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1295:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2090:%.*]] = sext i16 [[TMP1295]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2091:%.*]] = icmp slt i32 [[CONV2089]], [[CONV2090]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2091]], label [[IF_THEN2093:%.*]], label [[IF_END2094:%.*]] +// SIMD-ONLY0: if.then2093: +// SIMD-ONLY0-NEXT: [[TMP1296:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1296]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2094]] +// SIMD-ONLY0: if.end2094: +// SIMD-ONLY0-NEXT: [[TMP1297:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1297]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1298:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2095:%.*]] = sext i16 [[TMP1298]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1299:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2096:%.*]] = sext i16 [[TMP1299]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2097:%.*]] = icmp eq i32 [[CONV2095]], [[CONV2096]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2097]], label [[IF_THEN2099:%.*]], label [[IF_END2100:%.*]] +// SIMD-ONLY0: if.then2099: +// SIMD-ONLY0-NEXT: [[TMP1300:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1300]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2100]] +// SIMD-ONLY0: if.end2100: +// SIMD-ONLY0-NEXT: [[TMP1301:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1301]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1302:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2101:%.*]] = sext i16 [[TMP1302]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1303:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2102:%.*]] = sext i16 [[TMP1303]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2103:%.*]] = icmp eq i32 [[CONV2101]], [[CONV2102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2103]], label [[IF_THEN2105:%.*]], label [[IF_END2106:%.*]] +// SIMD-ONLY0: if.then2105: +// SIMD-ONLY0-NEXT: [[TMP1304:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1304]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2106]] +// SIMD-ONLY0: if.end2106: +// SIMD-ONLY0-NEXT: [[TMP1305:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1305]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1306:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2107:%.*]] = sext i16 [[TMP1306]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1307:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2108:%.*]] = sext i16 [[TMP1307]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2109:%.*]] = icmp eq i32 [[CONV2107]], [[CONV2108]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2109]], label [[IF_THEN2111:%.*]], label [[IF_ELSE2112:%.*]] +// SIMD-ONLY0: if.then2111: +// SIMD-ONLY0-NEXT: [[TMP1308:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1308]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2113:%.*]] +// SIMD-ONLY0: if.else2112: +// SIMD-ONLY0-NEXT: [[TMP1309:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1309]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2113]] +// SIMD-ONLY0: if.end2113: +// SIMD-ONLY0-NEXT: [[TMP1310:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2114:%.*]] = sext i16 [[TMP1310]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1311:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2115:%.*]] = sext i16 [[TMP1311]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2116:%.*]] = icmp eq i32 [[CONV2114]], [[CONV2115]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2116]], label [[IF_THEN2118:%.*]], label [[IF_ELSE2119:%.*]] +// SIMD-ONLY0: if.then2118: +// SIMD-ONLY0-NEXT: [[TMP1312:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1312]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2120:%.*]] +// SIMD-ONLY0: if.else2119: +// SIMD-ONLY0-NEXT: [[TMP1313:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1313]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2120]] +// SIMD-ONLY0: if.end2120: +// SIMD-ONLY0-NEXT: [[TMP1314:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2121:%.*]] = sext i16 [[TMP1314]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1315:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2122:%.*]] = sext i16 [[TMP1315]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2123:%.*]] = icmp eq i32 [[CONV2121]], [[CONV2122]] +// SIMD-ONLY0-NEXT: [[CONV2124:%.*]] = zext i1 [[CMP2123]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2125:%.*]] = trunc i32 [[CONV2124]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2125]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1316:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2126:%.*]] = icmp ne i16 [[TMP1316]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2126]], label [[IF_THEN2127:%.*]], label [[IF_END2128:%.*]] +// SIMD-ONLY0: if.then2127: +// SIMD-ONLY0-NEXT: [[TMP1317:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1317]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2128]] +// SIMD-ONLY0: if.end2128: +// SIMD-ONLY0-NEXT: [[TMP1318:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2129:%.*]] = sext i16 [[TMP1318]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1319:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2130:%.*]] = sext i16 [[TMP1319]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2131:%.*]] = icmp eq i32 [[CONV2129]], [[CONV2130]] +// SIMD-ONLY0-NEXT: [[CONV2132:%.*]] = zext i1 [[CMP2131]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2133:%.*]] = trunc i32 [[CONV2132]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2133]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1320:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2134:%.*]] = icmp ne i16 [[TMP1320]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2134]], label [[IF_THEN2135:%.*]], label [[IF_END2136:%.*]] +// SIMD-ONLY0: if.then2135: +// SIMD-ONLY0-NEXT: [[TMP1321:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1321]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2136]] +// SIMD-ONLY0: if.end2136: +// SIMD-ONLY0-NEXT: [[TMP1322:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2137:%.*]] = sext i16 [[TMP1322]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1323:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2138:%.*]] = sext i16 [[TMP1323]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2139:%.*]] = icmp eq i32 [[CONV2137]], [[CONV2138]] +// SIMD-ONLY0-NEXT: [[CONV2140:%.*]] = zext i1 [[CMP2139]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2141:%.*]] = trunc i32 [[CONV2140]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2141]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1324:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2142:%.*]] = icmp ne i16 [[TMP1324]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2142]], label [[IF_THEN2143:%.*]], label [[IF_ELSE2144:%.*]] +// SIMD-ONLY0: if.then2143: +// SIMD-ONLY0-NEXT: [[TMP1325:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1325]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2145:%.*]] +// SIMD-ONLY0: if.else2144: +// SIMD-ONLY0-NEXT: [[TMP1326:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1326]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2145]] +// SIMD-ONLY0: if.end2145: +// SIMD-ONLY0-NEXT: [[TMP1327:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2146:%.*]] = sext i16 [[TMP1327]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1328:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2147:%.*]] = sext i16 [[TMP1328]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2148:%.*]] = icmp eq i32 [[CONV2146]], [[CONV2147]] +// SIMD-ONLY0-NEXT: [[CONV2149:%.*]] = zext i1 [[CMP2148]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2150:%.*]] = trunc i32 [[CONV2149]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2150]], ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1329:%.*]] = load i16, ptr [[SR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2151:%.*]] = icmp ne i16 [[TMP1329]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2151]], label [[IF_THEN2152:%.*]], label [[IF_ELSE2153:%.*]] +// SIMD-ONLY0: if.then2152: +// SIMD-ONLY0-NEXT: [[TMP1330:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1330]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2154:%.*]] +// SIMD-ONLY0: if.else2153: +// SIMD-ONLY0-NEXT: [[TMP1331:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1331]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2154]] +// SIMD-ONLY0: if.end2154: +// SIMD-ONLY0-NEXT: [[TMP1332:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1332]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1333:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2155:%.*]] = zext i16 [[TMP1333]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1334:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2156:%.*]] = zext i16 [[TMP1334]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2157:%.*]] = icmp sgt i32 [[CONV2155]], [[CONV2156]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2157]], label [[IF_THEN2159:%.*]], label [[IF_END2160:%.*]] +// SIMD-ONLY0: if.then2159: +// SIMD-ONLY0-NEXT: [[TMP1335:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1335]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2160]] +// SIMD-ONLY0: if.end2160: +// SIMD-ONLY0-NEXT: [[TMP1336:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1336]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1337:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2161:%.*]] = zext i16 [[TMP1337]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1338:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2162:%.*]] = zext i16 [[TMP1338]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2163:%.*]] = icmp sgt i32 [[CONV2161]], [[CONV2162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2163]], label [[IF_THEN2165:%.*]], label [[IF_END2166:%.*]] +// SIMD-ONLY0: if.then2165: +// SIMD-ONLY0-NEXT: [[TMP1339:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1339]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2166]] +// SIMD-ONLY0: if.end2166: +// SIMD-ONLY0-NEXT: [[TMP1340:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1340]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1341:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2167:%.*]] = zext i16 [[TMP1341]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1342:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2168:%.*]] = zext i16 [[TMP1342]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2169:%.*]] = icmp slt i32 [[CONV2167]], [[CONV2168]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2169]], label [[IF_THEN2171:%.*]], label [[IF_END2172:%.*]] +// SIMD-ONLY0: if.then2171: +// SIMD-ONLY0-NEXT: [[TMP1343:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1343]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2172]] +// SIMD-ONLY0: if.end2172: +// SIMD-ONLY0-NEXT: [[TMP1344:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1344]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1345:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2173:%.*]] = zext i16 [[TMP1345]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1346:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2174:%.*]] = zext i16 [[TMP1346]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2175:%.*]] = icmp slt i32 [[CONV2173]], [[CONV2174]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2175]], label [[IF_THEN2177:%.*]], label [[IF_END2178:%.*]] +// SIMD-ONLY0: if.then2177: +// SIMD-ONLY0-NEXT: [[TMP1347:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1347]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2178]] +// SIMD-ONLY0: if.end2178: +// SIMD-ONLY0-NEXT: [[TMP1348:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1348]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1349:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2179:%.*]] = zext i16 [[TMP1349]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1350:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2180:%.*]] = zext i16 [[TMP1350]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2181:%.*]] = icmp eq i32 [[CONV2179]], [[CONV2180]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2181]], label [[IF_THEN2183:%.*]], label [[IF_END2184:%.*]] +// SIMD-ONLY0: if.then2183: +// SIMD-ONLY0-NEXT: [[TMP1351:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1351]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2184]] +// SIMD-ONLY0: if.end2184: +// SIMD-ONLY0-NEXT: [[TMP1352:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1352]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1353:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2185:%.*]] = zext i16 [[TMP1353]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1354:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2186:%.*]] = zext i16 [[TMP1354]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2187:%.*]] = icmp eq i32 [[CONV2185]], [[CONV2186]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2187]], label [[IF_THEN2189:%.*]], label [[IF_END2190:%.*]] +// SIMD-ONLY0: if.then2189: +// SIMD-ONLY0-NEXT: [[TMP1355:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1355]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2190]] +// SIMD-ONLY0: if.end2190: +// SIMD-ONLY0-NEXT: [[TMP1356:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2191:%.*]] = zext i16 [[TMP1356]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1357:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2192:%.*]] = zext i16 [[TMP1357]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2193:%.*]] = icmp sgt i32 [[CONV2191]], [[CONV2192]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2193]], label [[IF_THEN2195:%.*]], label [[IF_END2196:%.*]] +// SIMD-ONLY0: if.then2195: +// SIMD-ONLY0-NEXT: [[TMP1358:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1358]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2196]] +// SIMD-ONLY0: if.end2196: +// SIMD-ONLY0-NEXT: [[TMP1359:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1359]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1360:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2197:%.*]] = zext i16 [[TMP1360]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1361:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2198:%.*]] = zext i16 [[TMP1361]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2199:%.*]] = icmp sgt i32 [[CONV2197]], [[CONV2198]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2199]], label [[IF_THEN2201:%.*]], label [[IF_END2202:%.*]] +// SIMD-ONLY0: if.then2201: +// SIMD-ONLY0-NEXT: [[TMP1362:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1362]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2202]] +// SIMD-ONLY0: if.end2202: +// SIMD-ONLY0-NEXT: [[TMP1363:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1363]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1364:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2203:%.*]] = zext i16 [[TMP1364]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1365:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2204:%.*]] = zext i16 [[TMP1365]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2205:%.*]] = icmp slt i32 [[CONV2203]], [[CONV2204]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2205]], label [[IF_THEN2207:%.*]], label [[IF_END2208:%.*]] +// SIMD-ONLY0: if.then2207: +// SIMD-ONLY0-NEXT: [[TMP1366:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1366]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2208]] +// SIMD-ONLY0: if.end2208: +// SIMD-ONLY0-NEXT: [[TMP1367:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1367]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1368:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2209:%.*]] = zext i16 [[TMP1368]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1369:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2210:%.*]] = zext i16 [[TMP1369]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2211:%.*]] = icmp slt i32 [[CONV2209]], [[CONV2210]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2211]], label [[IF_THEN2213:%.*]], label [[IF_END2214:%.*]] +// SIMD-ONLY0: if.then2213: +// SIMD-ONLY0-NEXT: [[TMP1370:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1370]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2214]] +// SIMD-ONLY0: if.end2214: +// SIMD-ONLY0-NEXT: [[TMP1371:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1371]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1372:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2215:%.*]] = zext i16 [[TMP1372]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1373:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2216:%.*]] = zext i16 [[TMP1373]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2217:%.*]] = icmp eq i32 [[CONV2215]], [[CONV2216]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2217]], label [[IF_THEN2219:%.*]], label [[IF_END2220:%.*]] +// SIMD-ONLY0: if.then2219: +// SIMD-ONLY0-NEXT: [[TMP1374:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1374]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2220]] +// SIMD-ONLY0: if.end2220: +// SIMD-ONLY0-NEXT: [[TMP1375:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1375]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1376:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2221:%.*]] = zext i16 [[TMP1376]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1377:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2222:%.*]] = zext i16 [[TMP1377]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2223:%.*]] = icmp eq i32 [[CONV2221]], [[CONV2222]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2223]], label [[IF_THEN2225:%.*]], label [[IF_END2226:%.*]] +// SIMD-ONLY0: if.then2225: +// SIMD-ONLY0-NEXT: [[TMP1378:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1378]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2226]] +// SIMD-ONLY0: if.end2226: +// SIMD-ONLY0-NEXT: [[TMP1379:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1379]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1380:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2227:%.*]] = zext i16 [[TMP1380]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1381:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2228:%.*]] = zext i16 [[TMP1381]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2229:%.*]] = icmp eq i32 [[CONV2227]], [[CONV2228]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2229]], label [[IF_THEN2231:%.*]], label [[IF_ELSE2232:%.*]] +// SIMD-ONLY0: if.then2231: +// SIMD-ONLY0-NEXT: [[TMP1382:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1382]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2233:%.*]] +// SIMD-ONLY0: if.else2232: +// SIMD-ONLY0-NEXT: [[TMP1383:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1383]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2233]] +// SIMD-ONLY0: if.end2233: +// SIMD-ONLY0-NEXT: [[TMP1384:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2234:%.*]] = zext i16 [[TMP1384]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1385:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2235:%.*]] = zext i16 [[TMP1385]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2236:%.*]] = icmp eq i32 [[CONV2234]], [[CONV2235]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2236]], label [[IF_THEN2238:%.*]], label [[IF_ELSE2239:%.*]] +// SIMD-ONLY0: if.then2238: +// SIMD-ONLY0-NEXT: [[TMP1386:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1386]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2240:%.*]] +// SIMD-ONLY0: if.else2239: +// SIMD-ONLY0-NEXT: [[TMP1387:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1387]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2240]] +// SIMD-ONLY0: if.end2240: +// SIMD-ONLY0-NEXT: [[TMP1388:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2241:%.*]] = zext i16 [[TMP1388]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1389:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2242:%.*]] = zext i16 [[TMP1389]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2243:%.*]] = icmp eq i32 [[CONV2241]], [[CONV2242]] +// SIMD-ONLY0-NEXT: [[CONV2244:%.*]] = zext i1 [[CMP2243]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2245:%.*]] = trunc i32 [[CONV2244]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2245]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1390:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2246:%.*]] = icmp ne i16 [[TMP1390]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2246]], label [[IF_THEN2247:%.*]], label [[IF_END2248:%.*]] +// SIMD-ONLY0: if.then2247: +// SIMD-ONLY0-NEXT: [[TMP1391:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1391]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2248]] +// SIMD-ONLY0: if.end2248: +// SIMD-ONLY0-NEXT: [[TMP1392:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2249:%.*]] = zext i16 [[TMP1392]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1393:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2250:%.*]] = zext i16 [[TMP1393]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2251:%.*]] = icmp eq i32 [[CONV2249]], [[CONV2250]] +// SIMD-ONLY0-NEXT: [[CONV2252:%.*]] = zext i1 [[CMP2251]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2253:%.*]] = trunc i32 [[CONV2252]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2253]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1394:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2254:%.*]] = icmp ne i16 [[TMP1394]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2254]], label [[IF_THEN2255:%.*]], label [[IF_END2256:%.*]] +// SIMD-ONLY0: if.then2255: +// SIMD-ONLY0-NEXT: [[TMP1395:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1395]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2256]] +// SIMD-ONLY0: if.end2256: +// SIMD-ONLY0-NEXT: [[TMP1396:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2257:%.*]] = zext i16 [[TMP1396]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1397:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2258:%.*]] = zext i16 [[TMP1397]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2259:%.*]] = icmp eq i32 [[CONV2257]], [[CONV2258]] +// SIMD-ONLY0-NEXT: [[CONV2260:%.*]] = zext i1 [[CMP2259]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2261:%.*]] = trunc i32 [[CONV2260]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2261]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1398:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2262:%.*]] = icmp ne i16 [[TMP1398]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2262]], label [[IF_THEN2263:%.*]], label [[IF_ELSE2264:%.*]] +// SIMD-ONLY0: if.then2263: +// SIMD-ONLY0-NEXT: [[TMP1399:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1399]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2265:%.*]] +// SIMD-ONLY0: if.else2264: +// SIMD-ONLY0-NEXT: [[TMP1400:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1400]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2265]] +// SIMD-ONLY0: if.end2265: +// SIMD-ONLY0-NEXT: [[TMP1401:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2266:%.*]] = zext i16 [[TMP1401]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1402:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2267:%.*]] = zext i16 [[TMP1402]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2268:%.*]] = icmp eq i32 [[CONV2266]], [[CONV2267]] +// SIMD-ONLY0-NEXT: [[CONV2269:%.*]] = zext i1 [[CMP2268]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2270:%.*]] = trunc i32 [[CONV2269]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2270]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1403:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2271:%.*]] = icmp ne i16 [[TMP1403]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2271]], label [[IF_THEN2272:%.*]], label [[IF_ELSE2273:%.*]] +// SIMD-ONLY0: if.then2272: +// SIMD-ONLY0-NEXT: [[TMP1404:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1404]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2274:%.*]] +// SIMD-ONLY0: if.else2273: +// SIMD-ONLY0-NEXT: [[TMP1405:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1405]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2274]] +// SIMD-ONLY0: if.end2274: +// SIMD-ONLY0-NEXT: [[TMP1406:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1406]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1407:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2275:%.*]] = zext i16 [[TMP1407]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1408:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2276:%.*]] = zext i16 [[TMP1408]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2277:%.*]] = icmp sgt i32 [[CONV2275]], [[CONV2276]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2277]], label [[IF_THEN2279:%.*]], label [[IF_END2280:%.*]] +// SIMD-ONLY0: if.then2279: +// SIMD-ONLY0-NEXT: [[TMP1409:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1409]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2280]] +// SIMD-ONLY0: if.end2280: +// SIMD-ONLY0-NEXT: [[TMP1410:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1410]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1411:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2281:%.*]] = zext i16 [[TMP1411]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1412:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2282:%.*]] = zext i16 [[TMP1412]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2283:%.*]] = icmp sgt i32 [[CONV2281]], [[CONV2282]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2283]], label [[IF_THEN2285:%.*]], label [[IF_END2286:%.*]] +// SIMD-ONLY0: if.then2285: +// SIMD-ONLY0-NEXT: [[TMP1413:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1413]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2286]] +// SIMD-ONLY0: if.end2286: +// SIMD-ONLY0-NEXT: [[TMP1414:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1414]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1415:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2287:%.*]] = zext i16 [[TMP1415]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1416:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2288:%.*]] = zext i16 [[TMP1416]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2289:%.*]] = icmp slt i32 [[CONV2287]], [[CONV2288]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2289]], label [[IF_THEN2291:%.*]], label [[IF_END2292:%.*]] +// SIMD-ONLY0: if.then2291: +// SIMD-ONLY0-NEXT: [[TMP1417:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1417]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2292]] +// SIMD-ONLY0: if.end2292: +// SIMD-ONLY0-NEXT: [[TMP1418:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1418]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1419:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2293:%.*]] = zext i16 [[TMP1419]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1420:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2294:%.*]] = zext i16 [[TMP1420]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2295:%.*]] = icmp slt i32 [[CONV2293]], [[CONV2294]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2295]], label [[IF_THEN2297:%.*]], label [[IF_END2298:%.*]] +// SIMD-ONLY0: if.then2297: +// SIMD-ONLY0-NEXT: [[TMP1421:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1421]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2298]] +// SIMD-ONLY0: if.end2298: +// SIMD-ONLY0-NEXT: [[TMP1422:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1422]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1423:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2299:%.*]] = zext i16 [[TMP1423]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1424:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2300:%.*]] = zext i16 [[TMP1424]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2301:%.*]] = icmp eq i32 [[CONV2299]], [[CONV2300]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2301]], label [[IF_THEN2303:%.*]], label [[IF_END2304:%.*]] +// SIMD-ONLY0: if.then2303: +// SIMD-ONLY0-NEXT: [[TMP1425:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1425]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2304]] +// SIMD-ONLY0: if.end2304: +// SIMD-ONLY0-NEXT: [[TMP1426:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1426]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1427:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2305:%.*]] = zext i16 [[TMP1427]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1428:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2306:%.*]] = zext i16 [[TMP1428]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2307:%.*]] = icmp eq i32 [[CONV2305]], [[CONV2306]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2307]], label [[IF_THEN2309:%.*]], label [[IF_END2310:%.*]] +// SIMD-ONLY0: if.then2309: +// SIMD-ONLY0-NEXT: [[TMP1429:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1429]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2310]] +// SIMD-ONLY0: if.end2310: +// SIMD-ONLY0-NEXT: [[TMP1430:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2311:%.*]] = zext i16 [[TMP1430]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1431:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2312:%.*]] = zext i16 [[TMP1431]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2313:%.*]] = icmp sgt i32 [[CONV2311]], [[CONV2312]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2313]], label [[IF_THEN2315:%.*]], label [[IF_END2316:%.*]] +// SIMD-ONLY0: if.then2315: +// SIMD-ONLY0-NEXT: [[TMP1432:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1432]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2316]] +// SIMD-ONLY0: if.end2316: +// SIMD-ONLY0-NEXT: [[TMP1433:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1433]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1434:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2317:%.*]] = zext i16 [[TMP1434]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1435:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2318:%.*]] = zext i16 [[TMP1435]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2319:%.*]] = icmp sgt i32 [[CONV2317]], [[CONV2318]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2319]], label [[IF_THEN2321:%.*]], label [[IF_END2322:%.*]] +// SIMD-ONLY0: if.then2321: +// SIMD-ONLY0-NEXT: [[TMP1436:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1436]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2322]] +// SIMD-ONLY0: if.end2322: +// SIMD-ONLY0-NEXT: [[TMP1437:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1437]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1438:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2323:%.*]] = zext i16 [[TMP1438]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1439:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2324:%.*]] = zext i16 [[TMP1439]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2325:%.*]] = icmp slt i32 [[CONV2323]], [[CONV2324]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2325]], label [[IF_THEN2327:%.*]], label [[IF_END2328:%.*]] +// SIMD-ONLY0: if.then2327: +// SIMD-ONLY0-NEXT: [[TMP1440:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1440]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2328]] +// SIMD-ONLY0: if.end2328: +// SIMD-ONLY0-NEXT: [[TMP1441:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1441]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1442:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2329:%.*]] = zext i16 [[TMP1442]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1443:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2330:%.*]] = zext i16 [[TMP1443]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2331:%.*]] = icmp slt i32 [[CONV2329]], [[CONV2330]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2331]], label [[IF_THEN2333:%.*]], label [[IF_END2334:%.*]] +// SIMD-ONLY0: if.then2333: +// SIMD-ONLY0-NEXT: [[TMP1444:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1444]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2334]] +// SIMD-ONLY0: if.end2334: +// SIMD-ONLY0-NEXT: [[TMP1445:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1445]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1446:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2335:%.*]] = zext i16 [[TMP1446]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1447:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2336:%.*]] = zext i16 [[TMP1447]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2337:%.*]] = icmp eq i32 [[CONV2335]], [[CONV2336]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2337]], label [[IF_THEN2339:%.*]], label [[IF_END2340:%.*]] +// SIMD-ONLY0: if.then2339: +// SIMD-ONLY0-NEXT: [[TMP1448:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1448]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2340]] +// SIMD-ONLY0: if.end2340: +// SIMD-ONLY0-NEXT: [[TMP1449:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1449]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1450:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2341:%.*]] = zext i16 [[TMP1450]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1451:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2342:%.*]] = zext i16 [[TMP1451]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2343:%.*]] = icmp eq i32 [[CONV2341]], [[CONV2342]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2343]], label [[IF_THEN2345:%.*]], label [[IF_END2346:%.*]] +// SIMD-ONLY0: if.then2345: +// SIMD-ONLY0-NEXT: [[TMP1452:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1452]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2346]] +// SIMD-ONLY0: if.end2346: +// SIMD-ONLY0-NEXT: [[TMP1453:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1453]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1454:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2347:%.*]] = zext i16 [[TMP1454]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1455:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2348:%.*]] = zext i16 [[TMP1455]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2349:%.*]] = icmp eq i32 [[CONV2347]], [[CONV2348]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2349]], label [[IF_THEN2351:%.*]], label [[IF_ELSE2352:%.*]] +// SIMD-ONLY0: if.then2351: +// SIMD-ONLY0-NEXT: [[TMP1456:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1456]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2353:%.*]] +// SIMD-ONLY0: if.else2352: +// SIMD-ONLY0-NEXT: [[TMP1457:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1457]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2353]] +// SIMD-ONLY0: if.end2353: +// SIMD-ONLY0-NEXT: [[TMP1458:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2354:%.*]] = zext i16 [[TMP1458]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1459:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2355:%.*]] = zext i16 [[TMP1459]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2356:%.*]] = icmp eq i32 [[CONV2354]], [[CONV2355]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2356]], label [[IF_THEN2358:%.*]], label [[IF_ELSE2359:%.*]] +// SIMD-ONLY0: if.then2358: +// SIMD-ONLY0-NEXT: [[TMP1460:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1460]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2360:%.*]] +// SIMD-ONLY0: if.else2359: +// SIMD-ONLY0-NEXT: [[TMP1461:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1461]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2360]] +// SIMD-ONLY0: if.end2360: +// SIMD-ONLY0-NEXT: [[TMP1462:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2361:%.*]] = zext i16 [[TMP1462]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1463:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2362:%.*]] = zext i16 [[TMP1463]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2363:%.*]] = icmp eq i32 [[CONV2361]], [[CONV2362]] +// SIMD-ONLY0-NEXT: [[CONV2364:%.*]] = zext i1 [[CMP2363]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2365:%.*]] = trunc i32 [[CONV2364]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2365]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1464:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2366:%.*]] = icmp ne i16 [[TMP1464]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2366]], label [[IF_THEN2367:%.*]], label [[IF_END2368:%.*]] +// SIMD-ONLY0: if.then2367: +// SIMD-ONLY0-NEXT: [[TMP1465:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1465]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2368]] +// SIMD-ONLY0: if.end2368: +// SIMD-ONLY0-NEXT: [[TMP1466:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2369:%.*]] = zext i16 [[TMP1466]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1467:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2370:%.*]] = zext i16 [[TMP1467]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2371:%.*]] = icmp eq i32 [[CONV2369]], [[CONV2370]] +// SIMD-ONLY0-NEXT: [[CONV2372:%.*]] = zext i1 [[CMP2371]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2373:%.*]] = trunc i32 [[CONV2372]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2373]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1468:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2374:%.*]] = icmp ne i16 [[TMP1468]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2374]], label [[IF_THEN2375:%.*]], label [[IF_END2376:%.*]] +// SIMD-ONLY0: if.then2375: +// SIMD-ONLY0-NEXT: [[TMP1469:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1469]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2376]] +// SIMD-ONLY0: if.end2376: +// SIMD-ONLY0-NEXT: [[TMP1470:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2377:%.*]] = zext i16 [[TMP1470]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1471:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2378:%.*]] = zext i16 [[TMP1471]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2379:%.*]] = icmp eq i32 [[CONV2377]], [[CONV2378]] +// SIMD-ONLY0-NEXT: [[CONV2380:%.*]] = zext i1 [[CMP2379]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2381:%.*]] = trunc i32 [[CONV2380]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2381]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1472:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2382:%.*]] = icmp ne i16 [[TMP1472]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2382]], label [[IF_THEN2383:%.*]], label [[IF_ELSE2384:%.*]] +// SIMD-ONLY0: if.then2383: +// SIMD-ONLY0-NEXT: [[TMP1473:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1473]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2385:%.*]] +// SIMD-ONLY0: if.else2384: +// SIMD-ONLY0-NEXT: [[TMP1474:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1474]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2385]] +// SIMD-ONLY0: if.end2385: +// SIMD-ONLY0-NEXT: [[TMP1475:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2386:%.*]] = zext i16 [[TMP1475]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1476:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2387:%.*]] = zext i16 [[TMP1476]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2388:%.*]] = icmp eq i32 [[CONV2386]], [[CONV2387]] +// SIMD-ONLY0-NEXT: [[CONV2389:%.*]] = zext i1 [[CMP2388]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2390:%.*]] = trunc i32 [[CONV2389]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2390]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1477:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2391:%.*]] = icmp ne i16 [[TMP1477]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2391]], label [[IF_THEN2392:%.*]], label [[IF_ELSE2393:%.*]] +// SIMD-ONLY0: if.then2392: +// SIMD-ONLY0-NEXT: [[TMP1478:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1478]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2394:%.*]] +// SIMD-ONLY0: if.else2393: +// SIMD-ONLY0-NEXT: [[TMP1479:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1479]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2394]] +// SIMD-ONLY0: if.end2394: +// SIMD-ONLY0-NEXT: [[TMP1480:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1480]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1481:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2395:%.*]] = zext i16 [[TMP1481]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1482:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2396:%.*]] = zext i16 [[TMP1482]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2397:%.*]] = icmp sgt i32 [[CONV2395]], [[CONV2396]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2397]], label [[IF_THEN2399:%.*]], label [[IF_END2400:%.*]] +// SIMD-ONLY0: if.then2399: +// SIMD-ONLY0-NEXT: [[TMP1483:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1483]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2400]] +// SIMD-ONLY0: if.end2400: +// SIMD-ONLY0-NEXT: [[TMP1484:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1484]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1485:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2401:%.*]] = zext i16 [[TMP1485]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1486:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2402:%.*]] = zext i16 [[TMP1486]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2403:%.*]] = icmp sgt i32 [[CONV2401]], [[CONV2402]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2403]], label [[IF_THEN2405:%.*]], label [[IF_END2406:%.*]] +// SIMD-ONLY0: if.then2405: +// SIMD-ONLY0-NEXT: [[TMP1487:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1487]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2406]] +// SIMD-ONLY0: if.end2406: +// SIMD-ONLY0-NEXT: [[TMP1488:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1488]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1489:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2407:%.*]] = zext i16 [[TMP1489]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1490:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2408:%.*]] = zext i16 [[TMP1490]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2409:%.*]] = icmp slt i32 [[CONV2407]], [[CONV2408]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2409]], label [[IF_THEN2411:%.*]], label [[IF_END2412:%.*]] +// SIMD-ONLY0: if.then2411: +// SIMD-ONLY0-NEXT: [[TMP1491:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1491]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2412]] +// SIMD-ONLY0: if.end2412: +// SIMD-ONLY0-NEXT: [[TMP1492:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1492]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1493:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2413:%.*]] = zext i16 [[TMP1493]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1494:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2414:%.*]] = zext i16 [[TMP1494]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2415:%.*]] = icmp slt i32 [[CONV2413]], [[CONV2414]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2415]], label [[IF_THEN2417:%.*]], label [[IF_END2418:%.*]] +// SIMD-ONLY0: if.then2417: +// SIMD-ONLY0-NEXT: [[TMP1495:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1495]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2418]] +// SIMD-ONLY0: if.end2418: +// SIMD-ONLY0-NEXT: [[TMP1496:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1496]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1497:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2419:%.*]] = zext i16 [[TMP1497]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1498:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2420:%.*]] = zext i16 [[TMP1498]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2421:%.*]] = icmp eq i32 [[CONV2419]], [[CONV2420]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2421]], label [[IF_THEN2423:%.*]], label [[IF_END2424:%.*]] +// SIMD-ONLY0: if.then2423: +// SIMD-ONLY0-NEXT: [[TMP1499:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1499]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2424]] +// SIMD-ONLY0: if.end2424: +// SIMD-ONLY0-NEXT: [[TMP1500:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1500]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1501:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2425:%.*]] = zext i16 [[TMP1501]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1502:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2426:%.*]] = zext i16 [[TMP1502]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2427:%.*]] = icmp eq i32 [[CONV2425]], [[CONV2426]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2427]], label [[IF_THEN2429:%.*]], label [[IF_END2430:%.*]] +// SIMD-ONLY0: if.then2429: +// SIMD-ONLY0-NEXT: [[TMP1503:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1503]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2430]] +// SIMD-ONLY0: if.end2430: +// SIMD-ONLY0-NEXT: [[TMP1504:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2431:%.*]] = zext i16 [[TMP1504]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1505:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2432:%.*]] = zext i16 [[TMP1505]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2433:%.*]] = icmp sgt i32 [[CONV2431]], [[CONV2432]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2433]], label [[IF_THEN2435:%.*]], label [[IF_END2436:%.*]] +// SIMD-ONLY0: if.then2435: +// SIMD-ONLY0-NEXT: [[TMP1506:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1506]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2436]] +// SIMD-ONLY0: if.end2436: +// SIMD-ONLY0-NEXT: [[TMP1507:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1507]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1508:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2437:%.*]] = zext i16 [[TMP1508]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1509:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2438:%.*]] = zext i16 [[TMP1509]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2439:%.*]] = icmp sgt i32 [[CONV2437]], [[CONV2438]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2439]], label [[IF_THEN2441:%.*]], label [[IF_END2442:%.*]] +// SIMD-ONLY0: if.then2441: +// SIMD-ONLY0-NEXT: [[TMP1510:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1510]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2442]] +// SIMD-ONLY0: if.end2442: +// SIMD-ONLY0-NEXT: [[TMP1511:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1511]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1512:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2443:%.*]] = zext i16 [[TMP1512]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1513:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2444:%.*]] = zext i16 [[TMP1513]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2445:%.*]] = icmp slt i32 [[CONV2443]], [[CONV2444]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2445]], label [[IF_THEN2447:%.*]], label [[IF_END2448:%.*]] +// SIMD-ONLY0: if.then2447: +// SIMD-ONLY0-NEXT: [[TMP1514:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1514]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2448]] +// SIMD-ONLY0: if.end2448: +// SIMD-ONLY0-NEXT: [[TMP1515:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1515]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1516:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2449:%.*]] = zext i16 [[TMP1516]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1517:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2450:%.*]] = zext i16 [[TMP1517]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2451:%.*]] = icmp slt i32 [[CONV2449]], [[CONV2450]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2451]], label [[IF_THEN2453:%.*]], label [[IF_END2454:%.*]] +// SIMD-ONLY0: if.then2453: +// SIMD-ONLY0-NEXT: [[TMP1518:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1518]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2454]] +// SIMD-ONLY0: if.end2454: +// SIMD-ONLY0-NEXT: [[TMP1519:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1519]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1520:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2455:%.*]] = zext i16 [[TMP1520]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1521:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2456:%.*]] = zext i16 [[TMP1521]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2457:%.*]] = icmp eq i32 [[CONV2455]], [[CONV2456]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2457]], label [[IF_THEN2459:%.*]], label [[IF_END2460:%.*]] +// SIMD-ONLY0: if.then2459: +// SIMD-ONLY0-NEXT: [[TMP1522:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1522]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2460]] +// SIMD-ONLY0: if.end2460: +// SIMD-ONLY0-NEXT: [[TMP1523:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1523]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1524:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2461:%.*]] = zext i16 [[TMP1524]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1525:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2462:%.*]] = zext i16 [[TMP1525]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2463:%.*]] = icmp eq i32 [[CONV2461]], [[CONV2462]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2463]], label [[IF_THEN2465:%.*]], label [[IF_END2466:%.*]] +// SIMD-ONLY0: if.then2465: +// SIMD-ONLY0-NEXT: [[TMP1526:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1526]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2466]] +// SIMD-ONLY0: if.end2466: +// SIMD-ONLY0-NEXT: [[TMP1527:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1527]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1528:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2467:%.*]] = zext i16 [[TMP1528]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1529:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2468:%.*]] = zext i16 [[TMP1529]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2469:%.*]] = icmp eq i32 [[CONV2467]], [[CONV2468]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2469]], label [[IF_THEN2471:%.*]], label [[IF_ELSE2472:%.*]] +// SIMD-ONLY0: if.then2471: +// SIMD-ONLY0-NEXT: [[TMP1530:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1530]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2473:%.*]] +// SIMD-ONLY0: if.else2472: +// SIMD-ONLY0-NEXT: [[TMP1531:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1531]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2473]] +// SIMD-ONLY0: if.end2473: +// SIMD-ONLY0-NEXT: [[TMP1532:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2474:%.*]] = zext i16 [[TMP1532]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1533:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2475:%.*]] = zext i16 [[TMP1533]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2476:%.*]] = icmp eq i32 [[CONV2474]], [[CONV2475]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2476]], label [[IF_THEN2478:%.*]], label [[IF_ELSE2479:%.*]] +// SIMD-ONLY0: if.then2478: +// SIMD-ONLY0-NEXT: [[TMP1534:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1534]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2480:%.*]] +// SIMD-ONLY0: if.else2479: +// SIMD-ONLY0-NEXT: [[TMP1535:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1535]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2480]] +// SIMD-ONLY0: if.end2480: +// SIMD-ONLY0-NEXT: [[TMP1536:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2481:%.*]] = zext i16 [[TMP1536]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1537:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2482:%.*]] = zext i16 [[TMP1537]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2483:%.*]] = icmp eq i32 [[CONV2481]], [[CONV2482]] +// SIMD-ONLY0-NEXT: [[CONV2484:%.*]] = zext i1 [[CMP2483]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2485:%.*]] = trunc i32 [[CONV2484]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2485]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1538:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2486:%.*]] = icmp ne i16 [[TMP1538]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2486]], label [[IF_THEN2487:%.*]], label [[IF_END2488:%.*]] +// SIMD-ONLY0: if.then2487: +// SIMD-ONLY0-NEXT: [[TMP1539:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1539]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2488]] +// SIMD-ONLY0: if.end2488: +// SIMD-ONLY0-NEXT: [[TMP1540:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2489:%.*]] = zext i16 [[TMP1540]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1541:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2490:%.*]] = zext i16 [[TMP1541]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2491:%.*]] = icmp eq i32 [[CONV2489]], [[CONV2490]] +// SIMD-ONLY0-NEXT: [[CONV2492:%.*]] = zext i1 [[CMP2491]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2493:%.*]] = trunc i32 [[CONV2492]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2493]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1542:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2494:%.*]] = icmp ne i16 [[TMP1542]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2494]], label [[IF_THEN2495:%.*]], label [[IF_END2496:%.*]] +// SIMD-ONLY0: if.then2495: +// SIMD-ONLY0-NEXT: [[TMP1543:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1543]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2496]] +// SIMD-ONLY0: if.end2496: +// SIMD-ONLY0-NEXT: [[TMP1544:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2497:%.*]] = zext i16 [[TMP1544]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1545:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2498:%.*]] = zext i16 [[TMP1545]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2499:%.*]] = icmp eq i32 [[CONV2497]], [[CONV2498]] +// SIMD-ONLY0-NEXT: [[CONV2500:%.*]] = zext i1 [[CMP2499]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2501:%.*]] = trunc i32 [[CONV2500]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2501]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1546:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2502:%.*]] = icmp ne i16 [[TMP1546]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2502]], label [[IF_THEN2503:%.*]], label [[IF_ELSE2504:%.*]] +// SIMD-ONLY0: if.then2503: +// SIMD-ONLY0-NEXT: [[TMP1547:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1547]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2505:%.*]] +// SIMD-ONLY0: if.else2504: +// SIMD-ONLY0-NEXT: [[TMP1548:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1548]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2505]] +// SIMD-ONLY0: if.end2505: +// SIMD-ONLY0-NEXT: [[TMP1549:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2506:%.*]] = zext i16 [[TMP1549]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1550:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2507:%.*]] = zext i16 [[TMP1550]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2508:%.*]] = icmp eq i32 [[CONV2506]], [[CONV2507]] +// SIMD-ONLY0-NEXT: [[CONV2509:%.*]] = zext i1 [[CMP2508]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2510:%.*]] = trunc i32 [[CONV2509]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2510]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1551:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2511:%.*]] = icmp ne i16 [[TMP1551]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2511]], label [[IF_THEN2512:%.*]], label [[IF_ELSE2513:%.*]] +// SIMD-ONLY0: if.then2512: +// SIMD-ONLY0-NEXT: [[TMP1552:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1552]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2514:%.*]] +// SIMD-ONLY0: if.else2513: +// SIMD-ONLY0-NEXT: [[TMP1553:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1553]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2514]] +// SIMD-ONLY0: if.end2514: +// SIMD-ONLY0-NEXT: [[TMP1554:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1554]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1555:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2515:%.*]] = zext i16 [[TMP1555]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1556:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2516:%.*]] = zext i16 [[TMP1556]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2517:%.*]] = icmp sgt i32 [[CONV2515]], [[CONV2516]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2517]], label [[IF_THEN2519:%.*]], label [[IF_END2520:%.*]] +// SIMD-ONLY0: if.then2519: +// SIMD-ONLY0-NEXT: [[TMP1557:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1557]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2520]] +// SIMD-ONLY0: if.end2520: +// SIMD-ONLY0-NEXT: [[TMP1558:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1558]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1559:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2521:%.*]] = zext i16 [[TMP1559]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1560:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2522:%.*]] = zext i16 [[TMP1560]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2523:%.*]] = icmp sgt i32 [[CONV2521]], [[CONV2522]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2523]], label [[IF_THEN2525:%.*]], label [[IF_END2526:%.*]] +// SIMD-ONLY0: if.then2525: +// SIMD-ONLY0-NEXT: [[TMP1561:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1561]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2526]] +// SIMD-ONLY0: if.end2526: +// SIMD-ONLY0-NEXT: [[TMP1562:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1562]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1563:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2527:%.*]] = zext i16 [[TMP1563]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1564:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2528:%.*]] = zext i16 [[TMP1564]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2529:%.*]] = icmp slt i32 [[CONV2527]], [[CONV2528]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2529]], label [[IF_THEN2531:%.*]], label [[IF_END2532:%.*]] +// SIMD-ONLY0: if.then2531: +// SIMD-ONLY0-NEXT: [[TMP1565:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1565]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2532]] +// SIMD-ONLY0: if.end2532: +// SIMD-ONLY0-NEXT: [[TMP1566:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1566]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1567:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2533:%.*]] = zext i16 [[TMP1567]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1568:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2534:%.*]] = zext i16 [[TMP1568]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2535:%.*]] = icmp slt i32 [[CONV2533]], [[CONV2534]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2535]], label [[IF_THEN2537:%.*]], label [[IF_END2538:%.*]] +// SIMD-ONLY0: if.then2537: +// SIMD-ONLY0-NEXT: [[TMP1569:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1569]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2538]] +// SIMD-ONLY0: if.end2538: +// SIMD-ONLY0-NEXT: [[TMP1570:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1570]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1571:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2539:%.*]] = zext i16 [[TMP1571]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1572:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2540:%.*]] = zext i16 [[TMP1572]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2541:%.*]] = icmp eq i32 [[CONV2539]], [[CONV2540]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2541]], label [[IF_THEN2543:%.*]], label [[IF_END2544:%.*]] +// SIMD-ONLY0: if.then2543: +// SIMD-ONLY0-NEXT: [[TMP1573:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1573]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2544]] +// SIMD-ONLY0: if.end2544: +// SIMD-ONLY0-NEXT: [[TMP1574:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1574]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1575:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2545:%.*]] = zext i16 [[TMP1575]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1576:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2546:%.*]] = zext i16 [[TMP1576]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2547:%.*]] = icmp eq i32 [[CONV2545]], [[CONV2546]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2547]], label [[IF_THEN2549:%.*]], label [[IF_END2550:%.*]] +// SIMD-ONLY0: if.then2549: +// SIMD-ONLY0-NEXT: [[TMP1577:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1577]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2550]] +// SIMD-ONLY0: if.end2550: +// SIMD-ONLY0-NEXT: [[TMP1578:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2551:%.*]] = zext i16 [[TMP1578]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1579:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2552:%.*]] = zext i16 [[TMP1579]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2553:%.*]] = icmp sgt i32 [[CONV2551]], [[CONV2552]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2553]], label [[IF_THEN2555:%.*]], label [[IF_END2556:%.*]] +// SIMD-ONLY0: if.then2555: +// SIMD-ONLY0-NEXT: [[TMP1580:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1580]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2556]] +// SIMD-ONLY0: if.end2556: +// SIMD-ONLY0-NEXT: [[TMP1581:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1581]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1582:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2557:%.*]] = zext i16 [[TMP1582]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1583:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2558:%.*]] = zext i16 [[TMP1583]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2559:%.*]] = icmp sgt i32 [[CONV2557]], [[CONV2558]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2559]], label [[IF_THEN2561:%.*]], label [[IF_END2562:%.*]] +// SIMD-ONLY0: if.then2561: +// SIMD-ONLY0-NEXT: [[TMP1584:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1584]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2562]] +// SIMD-ONLY0: if.end2562: +// SIMD-ONLY0-NEXT: [[TMP1585:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1585]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1586:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2563:%.*]] = zext i16 [[TMP1586]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1587:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2564:%.*]] = zext i16 [[TMP1587]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2565:%.*]] = icmp slt i32 [[CONV2563]], [[CONV2564]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2565]], label [[IF_THEN2567:%.*]], label [[IF_END2568:%.*]] +// SIMD-ONLY0: if.then2567: +// SIMD-ONLY0-NEXT: [[TMP1588:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1588]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2568]] +// SIMD-ONLY0: if.end2568: +// SIMD-ONLY0-NEXT: [[TMP1589:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1589]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1590:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2569:%.*]] = zext i16 [[TMP1590]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1591:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2570:%.*]] = zext i16 [[TMP1591]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2571:%.*]] = icmp slt i32 [[CONV2569]], [[CONV2570]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2571]], label [[IF_THEN2573:%.*]], label [[IF_END2574:%.*]] +// SIMD-ONLY0: if.then2573: +// SIMD-ONLY0-NEXT: [[TMP1592:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1592]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2574]] +// SIMD-ONLY0: if.end2574: +// SIMD-ONLY0-NEXT: [[TMP1593:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1593]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1594:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2575:%.*]] = zext i16 [[TMP1594]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1595:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2576:%.*]] = zext i16 [[TMP1595]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2577:%.*]] = icmp eq i32 [[CONV2575]], [[CONV2576]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2577]], label [[IF_THEN2579:%.*]], label [[IF_END2580:%.*]] +// SIMD-ONLY0: if.then2579: +// SIMD-ONLY0-NEXT: [[TMP1596:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1596]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2580]] +// SIMD-ONLY0: if.end2580: +// SIMD-ONLY0-NEXT: [[TMP1597:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1597]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1598:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2581:%.*]] = zext i16 [[TMP1598]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1599:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2582:%.*]] = zext i16 [[TMP1599]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2583:%.*]] = icmp eq i32 [[CONV2581]], [[CONV2582]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2583]], label [[IF_THEN2585:%.*]], label [[IF_END2586:%.*]] +// SIMD-ONLY0: if.then2585: +// SIMD-ONLY0-NEXT: [[TMP1600:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1600]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2586]] +// SIMD-ONLY0: if.end2586: +// SIMD-ONLY0-NEXT: [[TMP1601:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1601]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1602:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2587:%.*]] = zext i16 [[TMP1602]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1603:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2588:%.*]] = zext i16 [[TMP1603]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2589:%.*]] = icmp eq i32 [[CONV2587]], [[CONV2588]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2589]], label [[IF_THEN2591:%.*]], label [[IF_ELSE2592:%.*]] +// SIMD-ONLY0: if.then2591: +// SIMD-ONLY0-NEXT: [[TMP1604:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1604]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2593:%.*]] +// SIMD-ONLY0: if.else2592: +// SIMD-ONLY0-NEXT: [[TMP1605:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1605]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2593]] +// SIMD-ONLY0: if.end2593: +// SIMD-ONLY0-NEXT: [[TMP1606:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2594:%.*]] = zext i16 [[TMP1606]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1607:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2595:%.*]] = zext i16 [[TMP1607]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2596:%.*]] = icmp eq i32 [[CONV2594]], [[CONV2595]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2596]], label [[IF_THEN2598:%.*]], label [[IF_ELSE2599:%.*]] +// SIMD-ONLY0: if.then2598: +// SIMD-ONLY0-NEXT: [[TMP1608:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1608]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2600:%.*]] +// SIMD-ONLY0: if.else2599: +// SIMD-ONLY0-NEXT: [[TMP1609:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1609]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2600]] +// SIMD-ONLY0: if.end2600: +// SIMD-ONLY0-NEXT: [[TMP1610:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2601:%.*]] = zext i16 [[TMP1610]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1611:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2602:%.*]] = zext i16 [[TMP1611]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2603:%.*]] = icmp eq i32 [[CONV2601]], [[CONV2602]] +// SIMD-ONLY0-NEXT: [[CONV2604:%.*]] = zext i1 [[CMP2603]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2605:%.*]] = trunc i32 [[CONV2604]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2605]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1612:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2606:%.*]] = icmp ne i16 [[TMP1612]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2606]], label [[IF_THEN2607:%.*]], label [[IF_END2608:%.*]] +// SIMD-ONLY0: if.then2607: +// SIMD-ONLY0-NEXT: [[TMP1613:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1613]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2608]] +// SIMD-ONLY0: if.end2608: +// SIMD-ONLY0-NEXT: [[TMP1614:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2609:%.*]] = zext i16 [[TMP1614]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1615:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2610:%.*]] = zext i16 [[TMP1615]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2611:%.*]] = icmp eq i32 [[CONV2609]], [[CONV2610]] +// SIMD-ONLY0-NEXT: [[CONV2612:%.*]] = zext i1 [[CMP2611]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2613:%.*]] = trunc i32 [[CONV2612]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2613]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1616:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2614:%.*]] = icmp ne i16 [[TMP1616]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2614]], label [[IF_THEN2615:%.*]], label [[IF_END2616:%.*]] +// SIMD-ONLY0: if.then2615: +// SIMD-ONLY0-NEXT: [[TMP1617:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1617]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2616]] +// SIMD-ONLY0: if.end2616: +// SIMD-ONLY0-NEXT: [[TMP1618:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2617:%.*]] = zext i16 [[TMP1618]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1619:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2618:%.*]] = zext i16 [[TMP1619]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2619:%.*]] = icmp eq i32 [[CONV2617]], [[CONV2618]] +// SIMD-ONLY0-NEXT: [[CONV2620:%.*]] = zext i1 [[CMP2619]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2621:%.*]] = trunc i32 [[CONV2620]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2621]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1620:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2622:%.*]] = icmp ne i16 [[TMP1620]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2622]], label [[IF_THEN2623:%.*]], label [[IF_ELSE2624:%.*]] +// SIMD-ONLY0: if.then2623: +// SIMD-ONLY0-NEXT: [[TMP1621:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1621]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2625:%.*]] +// SIMD-ONLY0: if.else2624: +// SIMD-ONLY0-NEXT: [[TMP1622:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1622]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2625]] +// SIMD-ONLY0: if.end2625: +// SIMD-ONLY0-NEXT: [[TMP1623:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2626:%.*]] = zext i16 [[TMP1623]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1624:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2627:%.*]] = zext i16 [[TMP1624]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2628:%.*]] = icmp eq i32 [[CONV2626]], [[CONV2627]] +// SIMD-ONLY0-NEXT: [[CONV2629:%.*]] = zext i1 [[CMP2628]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2630:%.*]] = trunc i32 [[CONV2629]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2630]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1625:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2631:%.*]] = icmp ne i16 [[TMP1625]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2631]], label [[IF_THEN2632:%.*]], label [[IF_ELSE2633:%.*]] +// SIMD-ONLY0: if.then2632: +// SIMD-ONLY0-NEXT: [[TMP1626:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1626]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2634:%.*]] +// SIMD-ONLY0: if.else2633: +// SIMD-ONLY0-NEXT: [[TMP1627:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1627]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2634]] +// SIMD-ONLY0: if.end2634: +// SIMD-ONLY0-NEXT: [[TMP1628:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1628]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1629:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2635:%.*]] = zext i16 [[TMP1629]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1630:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2636:%.*]] = zext i16 [[TMP1630]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2637:%.*]] = icmp sgt i32 [[CONV2635]], [[CONV2636]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2637]], label [[IF_THEN2639:%.*]], label [[IF_END2640:%.*]] +// SIMD-ONLY0: if.then2639: +// SIMD-ONLY0-NEXT: [[TMP1631:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1631]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2640]] +// SIMD-ONLY0: if.end2640: +// SIMD-ONLY0-NEXT: [[TMP1632:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1632]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1633:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2641:%.*]] = zext i16 [[TMP1633]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1634:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2642:%.*]] = zext i16 [[TMP1634]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2643:%.*]] = icmp sgt i32 [[CONV2641]], [[CONV2642]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2643]], label [[IF_THEN2645:%.*]], label [[IF_END2646:%.*]] +// SIMD-ONLY0: if.then2645: +// SIMD-ONLY0-NEXT: [[TMP1635:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1635]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2646]] +// SIMD-ONLY0: if.end2646: +// SIMD-ONLY0-NEXT: [[TMP1636:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1636]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1637:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2647:%.*]] = zext i16 [[TMP1637]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1638:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2648:%.*]] = zext i16 [[TMP1638]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2649:%.*]] = icmp slt i32 [[CONV2647]], [[CONV2648]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2649]], label [[IF_THEN2651:%.*]], label [[IF_END2652:%.*]] +// SIMD-ONLY0: if.then2651: +// SIMD-ONLY0-NEXT: [[TMP1639:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1639]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2652]] +// SIMD-ONLY0: if.end2652: +// SIMD-ONLY0-NEXT: [[TMP1640:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1640]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1641:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2653:%.*]] = zext i16 [[TMP1641]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1642:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2654:%.*]] = zext i16 [[TMP1642]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2655:%.*]] = icmp slt i32 [[CONV2653]], [[CONV2654]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2655]], label [[IF_THEN2657:%.*]], label [[IF_END2658:%.*]] +// SIMD-ONLY0: if.then2657: +// SIMD-ONLY0-NEXT: [[TMP1643:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1643]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2658]] +// SIMD-ONLY0: if.end2658: +// SIMD-ONLY0-NEXT: [[TMP1644:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1644]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1645:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2659:%.*]] = zext i16 [[TMP1645]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1646:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2660:%.*]] = zext i16 [[TMP1646]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2661:%.*]] = icmp eq i32 [[CONV2659]], [[CONV2660]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2661]], label [[IF_THEN2663:%.*]], label [[IF_END2664:%.*]] +// SIMD-ONLY0: if.then2663: +// SIMD-ONLY0-NEXT: [[TMP1647:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1647]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2664]] +// SIMD-ONLY0: if.end2664: +// SIMD-ONLY0-NEXT: [[TMP1648:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1648]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1649:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2665:%.*]] = zext i16 [[TMP1649]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1650:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2666:%.*]] = zext i16 [[TMP1650]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2667:%.*]] = icmp eq i32 [[CONV2665]], [[CONV2666]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2667]], label [[IF_THEN2669:%.*]], label [[IF_END2670:%.*]] +// SIMD-ONLY0: if.then2669: +// SIMD-ONLY0-NEXT: [[TMP1651:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1651]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2670]] +// SIMD-ONLY0: if.end2670: +// SIMD-ONLY0-NEXT: [[TMP1652:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2671:%.*]] = zext i16 [[TMP1652]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1653:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2672:%.*]] = zext i16 [[TMP1653]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2673:%.*]] = icmp sgt i32 [[CONV2671]], [[CONV2672]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2673]], label [[IF_THEN2675:%.*]], label [[IF_END2676:%.*]] +// SIMD-ONLY0: if.then2675: +// SIMD-ONLY0-NEXT: [[TMP1654:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1654]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2676]] +// SIMD-ONLY0: if.end2676: +// SIMD-ONLY0-NEXT: [[TMP1655:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1655]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1656:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2677:%.*]] = zext i16 [[TMP1656]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1657:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2678:%.*]] = zext i16 [[TMP1657]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2679:%.*]] = icmp sgt i32 [[CONV2677]], [[CONV2678]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2679]], label [[IF_THEN2681:%.*]], label [[IF_END2682:%.*]] +// SIMD-ONLY0: if.then2681: +// SIMD-ONLY0-NEXT: [[TMP1658:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1658]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2682]] +// SIMD-ONLY0: if.end2682: +// SIMD-ONLY0-NEXT: [[TMP1659:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1659]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1660:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2683:%.*]] = zext i16 [[TMP1660]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1661:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2684:%.*]] = zext i16 [[TMP1661]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2685:%.*]] = icmp slt i32 [[CONV2683]], [[CONV2684]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2685]], label [[IF_THEN2687:%.*]], label [[IF_END2688:%.*]] +// SIMD-ONLY0: if.then2687: +// SIMD-ONLY0-NEXT: [[TMP1662:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1662]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2688]] +// SIMD-ONLY0: if.end2688: +// SIMD-ONLY0-NEXT: [[TMP1663:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1663]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1664:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2689:%.*]] = zext i16 [[TMP1664]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1665:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2690:%.*]] = zext i16 [[TMP1665]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2691:%.*]] = icmp slt i32 [[CONV2689]], [[CONV2690]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2691]], label [[IF_THEN2693:%.*]], label [[IF_END2694:%.*]] +// SIMD-ONLY0: if.then2693: +// SIMD-ONLY0-NEXT: [[TMP1666:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1666]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2694]] +// SIMD-ONLY0: if.end2694: +// SIMD-ONLY0-NEXT: [[TMP1667:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1667]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1668:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2695:%.*]] = zext i16 [[TMP1668]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1669:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2696:%.*]] = zext i16 [[TMP1669]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2697:%.*]] = icmp eq i32 [[CONV2695]], [[CONV2696]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2697]], label [[IF_THEN2699:%.*]], label [[IF_END2700:%.*]] +// SIMD-ONLY0: if.then2699: +// SIMD-ONLY0-NEXT: [[TMP1670:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1670]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2700]] +// SIMD-ONLY0: if.end2700: +// SIMD-ONLY0-NEXT: [[TMP1671:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1671]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1672:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2701:%.*]] = zext i16 [[TMP1672]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1673:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2702:%.*]] = zext i16 [[TMP1673]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2703:%.*]] = icmp eq i32 [[CONV2701]], [[CONV2702]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2703]], label [[IF_THEN2705:%.*]], label [[IF_END2706:%.*]] +// SIMD-ONLY0: if.then2705: +// SIMD-ONLY0-NEXT: [[TMP1674:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1674]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2706]] +// SIMD-ONLY0: if.end2706: +// SIMD-ONLY0-NEXT: [[TMP1675:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1675]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1676:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2707:%.*]] = zext i16 [[TMP1676]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1677:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2708:%.*]] = zext i16 [[TMP1677]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2709:%.*]] = icmp eq i32 [[CONV2707]], [[CONV2708]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2709]], label [[IF_THEN2711:%.*]], label [[IF_ELSE2712:%.*]] +// SIMD-ONLY0: if.then2711: +// SIMD-ONLY0-NEXT: [[TMP1678:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1678]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2713:%.*]] +// SIMD-ONLY0: if.else2712: +// SIMD-ONLY0-NEXT: [[TMP1679:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1679]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2713]] +// SIMD-ONLY0: if.end2713: +// SIMD-ONLY0-NEXT: [[TMP1680:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2714:%.*]] = zext i16 [[TMP1680]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1681:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2715:%.*]] = zext i16 [[TMP1681]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2716:%.*]] = icmp eq i32 [[CONV2714]], [[CONV2715]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2716]], label [[IF_THEN2718:%.*]], label [[IF_ELSE2719:%.*]] +// SIMD-ONLY0: if.then2718: +// SIMD-ONLY0-NEXT: [[TMP1682:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1682]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2720:%.*]] +// SIMD-ONLY0: if.else2719: +// SIMD-ONLY0-NEXT: [[TMP1683:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1683]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2720]] +// SIMD-ONLY0: if.end2720: +// SIMD-ONLY0-NEXT: [[TMP1684:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2721:%.*]] = zext i16 [[TMP1684]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1685:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2722:%.*]] = zext i16 [[TMP1685]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2723:%.*]] = icmp eq i32 [[CONV2721]], [[CONV2722]] +// SIMD-ONLY0-NEXT: [[CONV2724:%.*]] = zext i1 [[CMP2723]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2725:%.*]] = trunc i32 [[CONV2724]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2725]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1686:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2726:%.*]] = icmp ne i16 [[TMP1686]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2726]], label [[IF_THEN2727:%.*]], label [[IF_END2728:%.*]] +// SIMD-ONLY0: if.then2727: +// SIMD-ONLY0-NEXT: [[TMP1687:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1687]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2728]] +// SIMD-ONLY0: if.end2728: +// SIMD-ONLY0-NEXT: [[TMP1688:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2729:%.*]] = zext i16 [[TMP1688]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1689:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2730:%.*]] = zext i16 [[TMP1689]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2731:%.*]] = icmp eq i32 [[CONV2729]], [[CONV2730]] +// SIMD-ONLY0-NEXT: [[CONV2732:%.*]] = zext i1 [[CMP2731]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2733:%.*]] = trunc i32 [[CONV2732]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2733]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1690:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2734:%.*]] = icmp ne i16 [[TMP1690]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2734]], label [[IF_THEN2735:%.*]], label [[IF_END2736:%.*]] +// SIMD-ONLY0: if.then2735: +// SIMD-ONLY0-NEXT: [[TMP1691:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1691]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2736]] +// SIMD-ONLY0: if.end2736: +// SIMD-ONLY0-NEXT: [[TMP1692:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2737:%.*]] = zext i16 [[TMP1692]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1693:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2738:%.*]] = zext i16 [[TMP1693]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2739:%.*]] = icmp eq i32 [[CONV2737]], [[CONV2738]] +// SIMD-ONLY0-NEXT: [[CONV2740:%.*]] = zext i1 [[CMP2739]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2741:%.*]] = trunc i32 [[CONV2740]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2741]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1694:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2742:%.*]] = icmp ne i16 [[TMP1694]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2742]], label [[IF_THEN2743:%.*]], label [[IF_ELSE2744:%.*]] +// SIMD-ONLY0: if.then2743: +// SIMD-ONLY0-NEXT: [[TMP1695:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1695]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2745:%.*]] +// SIMD-ONLY0: if.else2744: +// SIMD-ONLY0-NEXT: [[TMP1696:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1696]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2745]] +// SIMD-ONLY0: if.end2745: +// SIMD-ONLY0-NEXT: [[TMP1697:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2746:%.*]] = zext i16 [[TMP1697]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1698:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2747:%.*]] = zext i16 [[TMP1698]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2748:%.*]] = icmp eq i32 [[CONV2746]], [[CONV2747]] +// SIMD-ONLY0-NEXT: [[CONV2749:%.*]] = zext i1 [[CMP2748]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2750:%.*]] = trunc i32 [[CONV2749]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2750]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1699:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2751:%.*]] = icmp ne i16 [[TMP1699]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2751]], label [[IF_THEN2752:%.*]], label [[IF_ELSE2753:%.*]] +// SIMD-ONLY0: if.then2752: +// SIMD-ONLY0-NEXT: [[TMP1700:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1700]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2754:%.*]] +// SIMD-ONLY0: if.else2753: +// SIMD-ONLY0-NEXT: [[TMP1701:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1701]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2754]] +// SIMD-ONLY0: if.end2754: +// SIMD-ONLY0-NEXT: [[TMP1702:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1702]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1703:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2755:%.*]] = zext i16 [[TMP1703]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1704:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2756:%.*]] = zext i16 [[TMP1704]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2757:%.*]] = icmp sgt i32 [[CONV2755]], [[CONV2756]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2757]], label [[IF_THEN2759:%.*]], label [[IF_END2760:%.*]] +// SIMD-ONLY0: if.then2759: +// SIMD-ONLY0-NEXT: [[TMP1705:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1705]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2760]] +// SIMD-ONLY0: if.end2760: +// SIMD-ONLY0-NEXT: [[TMP1706:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1706]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1707:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2761:%.*]] = zext i16 [[TMP1707]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1708:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2762:%.*]] = zext i16 [[TMP1708]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2763:%.*]] = icmp sgt i32 [[CONV2761]], [[CONV2762]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2763]], label [[IF_THEN2765:%.*]], label [[IF_END2766:%.*]] +// SIMD-ONLY0: if.then2765: +// SIMD-ONLY0-NEXT: [[TMP1709:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1709]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2766]] +// SIMD-ONLY0: if.end2766: +// SIMD-ONLY0-NEXT: [[TMP1710:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1710]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1711:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2767:%.*]] = zext i16 [[TMP1711]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1712:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2768:%.*]] = zext i16 [[TMP1712]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2769:%.*]] = icmp slt i32 [[CONV2767]], [[CONV2768]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2769]], label [[IF_THEN2771:%.*]], label [[IF_END2772:%.*]] +// SIMD-ONLY0: if.then2771: +// SIMD-ONLY0-NEXT: [[TMP1713:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1713]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2772]] +// SIMD-ONLY0: if.end2772: +// SIMD-ONLY0-NEXT: [[TMP1714:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1714]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1715:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2773:%.*]] = zext i16 [[TMP1715]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1716:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2774:%.*]] = zext i16 [[TMP1716]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2775:%.*]] = icmp slt i32 [[CONV2773]], [[CONV2774]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2775]], label [[IF_THEN2777:%.*]], label [[IF_END2778:%.*]] +// SIMD-ONLY0: if.then2777: +// SIMD-ONLY0-NEXT: [[TMP1717:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1717]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2778]] +// SIMD-ONLY0: if.end2778: +// SIMD-ONLY0-NEXT: [[TMP1718:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1718]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1719:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2779:%.*]] = zext i16 [[TMP1719]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1720:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2780:%.*]] = zext i16 [[TMP1720]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2781:%.*]] = icmp eq i32 [[CONV2779]], [[CONV2780]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2781]], label [[IF_THEN2783:%.*]], label [[IF_END2784:%.*]] +// SIMD-ONLY0: if.then2783: +// SIMD-ONLY0-NEXT: [[TMP1721:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1721]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2784]] +// SIMD-ONLY0: if.end2784: +// SIMD-ONLY0-NEXT: [[TMP1722:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1722]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1723:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2785:%.*]] = zext i16 [[TMP1723]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1724:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2786:%.*]] = zext i16 [[TMP1724]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2787:%.*]] = icmp eq i32 [[CONV2785]], [[CONV2786]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2787]], label [[IF_THEN2789:%.*]], label [[IF_END2790:%.*]] +// SIMD-ONLY0: if.then2789: +// SIMD-ONLY0-NEXT: [[TMP1725:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1725]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2790]] +// SIMD-ONLY0: if.end2790: +// SIMD-ONLY0-NEXT: [[TMP1726:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2791:%.*]] = zext i16 [[TMP1726]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1727:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2792:%.*]] = zext i16 [[TMP1727]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2793:%.*]] = icmp sgt i32 [[CONV2791]], [[CONV2792]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2793]], label [[IF_THEN2795:%.*]], label [[IF_END2796:%.*]] +// SIMD-ONLY0: if.then2795: +// SIMD-ONLY0-NEXT: [[TMP1728:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1728]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2796]] +// SIMD-ONLY0: if.end2796: +// SIMD-ONLY0-NEXT: [[TMP1729:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1729]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1730:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2797:%.*]] = zext i16 [[TMP1730]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1731:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2798:%.*]] = zext i16 [[TMP1731]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2799:%.*]] = icmp sgt i32 [[CONV2797]], [[CONV2798]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2799]], label [[IF_THEN2801:%.*]], label [[IF_END2802:%.*]] +// SIMD-ONLY0: if.then2801: +// SIMD-ONLY0-NEXT: [[TMP1732:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1732]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2802]] +// SIMD-ONLY0: if.end2802: +// SIMD-ONLY0-NEXT: [[TMP1733:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1733]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1734:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2803:%.*]] = zext i16 [[TMP1734]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1735:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2804:%.*]] = zext i16 [[TMP1735]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2805:%.*]] = icmp slt i32 [[CONV2803]], [[CONV2804]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2805]], label [[IF_THEN2807:%.*]], label [[IF_END2808:%.*]] +// SIMD-ONLY0: if.then2807: +// SIMD-ONLY0-NEXT: [[TMP1736:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1736]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2808]] +// SIMD-ONLY0: if.end2808: +// SIMD-ONLY0-NEXT: [[TMP1737:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1737]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1738:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2809:%.*]] = zext i16 [[TMP1738]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1739:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2810:%.*]] = zext i16 [[TMP1739]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2811:%.*]] = icmp slt i32 [[CONV2809]], [[CONV2810]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2811]], label [[IF_THEN2813:%.*]], label [[IF_END2814:%.*]] +// SIMD-ONLY0: if.then2813: +// SIMD-ONLY0-NEXT: [[TMP1740:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1740]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2814]] +// SIMD-ONLY0: if.end2814: +// SIMD-ONLY0-NEXT: [[TMP1741:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1741]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1742:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2815:%.*]] = zext i16 [[TMP1742]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1743:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2816:%.*]] = zext i16 [[TMP1743]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2817:%.*]] = icmp eq i32 [[CONV2815]], [[CONV2816]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2817]], label [[IF_THEN2819:%.*]], label [[IF_END2820:%.*]] +// SIMD-ONLY0: if.then2819: +// SIMD-ONLY0-NEXT: [[TMP1744:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1744]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2820]] +// SIMD-ONLY0: if.end2820: +// SIMD-ONLY0-NEXT: [[TMP1745:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1745]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1746:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2821:%.*]] = zext i16 [[TMP1746]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1747:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2822:%.*]] = zext i16 [[TMP1747]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2823:%.*]] = icmp eq i32 [[CONV2821]], [[CONV2822]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2823]], label [[IF_THEN2825:%.*]], label [[IF_END2826:%.*]] +// SIMD-ONLY0: if.then2825: +// SIMD-ONLY0-NEXT: [[TMP1748:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1748]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2826]] +// SIMD-ONLY0: if.end2826: +// SIMD-ONLY0-NEXT: [[TMP1749:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1749]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1750:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2827:%.*]] = zext i16 [[TMP1750]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1751:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2828:%.*]] = zext i16 [[TMP1751]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2829:%.*]] = icmp eq i32 [[CONV2827]], [[CONV2828]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2829]], label [[IF_THEN2831:%.*]], label [[IF_ELSE2832:%.*]] +// SIMD-ONLY0: if.then2831: +// SIMD-ONLY0-NEXT: [[TMP1752:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1752]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2833:%.*]] +// SIMD-ONLY0: if.else2832: +// SIMD-ONLY0-NEXT: [[TMP1753:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1753]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2833]] +// SIMD-ONLY0: if.end2833: +// SIMD-ONLY0-NEXT: [[TMP1754:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2834:%.*]] = zext i16 [[TMP1754]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1755:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2835:%.*]] = zext i16 [[TMP1755]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2836:%.*]] = icmp eq i32 [[CONV2834]], [[CONV2835]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2836]], label [[IF_THEN2838:%.*]], label [[IF_ELSE2839:%.*]] +// SIMD-ONLY0: if.then2838: +// SIMD-ONLY0-NEXT: [[TMP1756:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1756]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2840:%.*]] +// SIMD-ONLY0: if.else2839: +// SIMD-ONLY0-NEXT: [[TMP1757:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1757]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2840]] +// SIMD-ONLY0: if.end2840: +// SIMD-ONLY0-NEXT: [[TMP1758:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2841:%.*]] = zext i16 [[TMP1758]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1759:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2842:%.*]] = zext i16 [[TMP1759]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2843:%.*]] = icmp eq i32 [[CONV2841]], [[CONV2842]] +// SIMD-ONLY0-NEXT: [[CONV2844:%.*]] = zext i1 [[CMP2843]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2845:%.*]] = trunc i32 [[CONV2844]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2845]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1760:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2846:%.*]] = icmp ne i16 [[TMP1760]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2846]], label [[IF_THEN2847:%.*]], label [[IF_END2848:%.*]] +// SIMD-ONLY0: if.then2847: +// SIMD-ONLY0-NEXT: [[TMP1761:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1761]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2848]] +// SIMD-ONLY0: if.end2848: +// SIMD-ONLY0-NEXT: [[TMP1762:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2849:%.*]] = zext i16 [[TMP1762]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1763:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2850:%.*]] = zext i16 [[TMP1763]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2851:%.*]] = icmp eq i32 [[CONV2849]], [[CONV2850]] +// SIMD-ONLY0-NEXT: [[CONV2852:%.*]] = zext i1 [[CMP2851]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2853:%.*]] = trunc i32 [[CONV2852]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2853]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1764:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2854:%.*]] = icmp ne i16 [[TMP1764]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2854]], label [[IF_THEN2855:%.*]], label [[IF_END2856:%.*]] +// SIMD-ONLY0: if.then2855: +// SIMD-ONLY0-NEXT: [[TMP1765:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1765]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2856]] +// SIMD-ONLY0: if.end2856: +// SIMD-ONLY0-NEXT: [[TMP1766:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2857:%.*]] = zext i16 [[TMP1766]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1767:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2858:%.*]] = zext i16 [[TMP1767]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2859:%.*]] = icmp eq i32 [[CONV2857]], [[CONV2858]] +// SIMD-ONLY0-NEXT: [[CONV2860:%.*]] = zext i1 [[CMP2859]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2861:%.*]] = trunc i32 [[CONV2860]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2861]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1768:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2862:%.*]] = icmp ne i16 [[TMP1768]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2862]], label [[IF_THEN2863:%.*]], label [[IF_ELSE2864:%.*]] +// SIMD-ONLY0: if.then2863: +// SIMD-ONLY0-NEXT: [[TMP1769:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1769]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2865:%.*]] +// SIMD-ONLY0: if.else2864: +// SIMD-ONLY0-NEXT: [[TMP1770:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1770]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2865]] +// SIMD-ONLY0: if.end2865: +// SIMD-ONLY0-NEXT: [[TMP1771:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2866:%.*]] = zext i16 [[TMP1771]] to i32 +// SIMD-ONLY0-NEXT: [[TMP1772:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV2867:%.*]] = zext i16 [[TMP1772]] to i32 +// SIMD-ONLY0-NEXT: [[CMP2868:%.*]] = icmp eq i32 [[CONV2866]], [[CONV2867]] +// SIMD-ONLY0-NEXT: [[CONV2869:%.*]] = zext i1 [[CMP2868]] to i32 +// SIMD-ONLY0-NEXT: [[CONV2870:%.*]] = trunc i32 [[CONV2869]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2870]], ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1773:%.*]] = load i16, ptr [[USR]], align 2 +// SIMD-ONLY0-NEXT: [[TOBOOL2871:%.*]] = icmp ne i16 [[TMP1773]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2871]], label [[IF_THEN2872:%.*]], label [[IF_ELSE2873:%.*]] +// SIMD-ONLY0: if.then2872: +// SIMD-ONLY0-NEXT: [[TMP1774:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1774]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2874:%.*]] +// SIMD-ONLY0: if.else2873: +// SIMD-ONLY0-NEXT: [[TMP1775:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP1775]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: br label [[IF_END2874]] +// SIMD-ONLY0: if.end2874: +// SIMD-ONLY0-NEXT: [[TMP1776:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1776]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1777:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1778:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2875:%.*]] = icmp sgt i32 [[TMP1777]], [[TMP1778]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2875]], label [[IF_THEN2877:%.*]], label [[IF_END2878:%.*]] +// SIMD-ONLY0: if.then2877: +// SIMD-ONLY0-NEXT: [[TMP1779:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1779]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2878]] +// SIMD-ONLY0: if.end2878: +// SIMD-ONLY0-NEXT: [[TMP1780:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1780]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1781:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1782:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2879:%.*]] = icmp sgt i32 [[TMP1781]], [[TMP1782]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2879]], label [[IF_THEN2881:%.*]], label [[IF_END2882:%.*]] +// SIMD-ONLY0: if.then2881: +// SIMD-ONLY0-NEXT: [[TMP1783:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1783]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2882]] +// SIMD-ONLY0: if.end2882: +// SIMD-ONLY0-NEXT: [[TMP1784:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1784]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1785:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1786:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2883:%.*]] = icmp slt i32 [[TMP1785]], [[TMP1786]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2883]], label [[IF_THEN2885:%.*]], label [[IF_END2886:%.*]] +// SIMD-ONLY0: if.then2885: +// SIMD-ONLY0-NEXT: [[TMP1787:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1787]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2886]] +// SIMD-ONLY0: if.end2886: +// SIMD-ONLY0-NEXT: [[TMP1788:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1788]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1789:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1790:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2887:%.*]] = icmp slt i32 [[TMP1789]], [[TMP1790]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2887]], label [[IF_THEN2889:%.*]], label [[IF_END2890:%.*]] +// SIMD-ONLY0: if.then2889: +// SIMD-ONLY0-NEXT: [[TMP1791:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1791]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2890]] +// SIMD-ONLY0: if.end2890: +// SIMD-ONLY0-NEXT: [[TMP1792:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1792]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1793:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1794:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2891:%.*]] = icmp eq i32 [[TMP1793]], [[TMP1794]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2891]], label [[IF_THEN2893:%.*]], label [[IF_END2894:%.*]] +// SIMD-ONLY0: if.then2893: +// SIMD-ONLY0-NEXT: [[TMP1795:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1795]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2894]] +// SIMD-ONLY0: if.end2894: +// SIMD-ONLY0-NEXT: [[TMP1796:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1796]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1797:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1798:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2895:%.*]] = icmp eq i32 [[TMP1797]], [[TMP1798]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2895]], label [[IF_THEN2897:%.*]], label [[IF_END2898:%.*]] +// SIMD-ONLY0: if.then2897: +// SIMD-ONLY0-NEXT: [[TMP1799:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1799]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2898]] +// SIMD-ONLY0: if.end2898: +// SIMD-ONLY0-NEXT: [[TMP1800:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1801:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2899:%.*]] = icmp sgt i32 [[TMP1800]], [[TMP1801]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2899]], label [[IF_THEN2901:%.*]], label [[IF_END2902:%.*]] +// SIMD-ONLY0: if.then2901: +// SIMD-ONLY0-NEXT: [[TMP1802:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1802]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2902]] +// SIMD-ONLY0: if.end2902: +// SIMD-ONLY0-NEXT: [[TMP1803:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1803]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1804:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1805:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2903:%.*]] = icmp sgt i32 [[TMP1804]], [[TMP1805]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2903]], label [[IF_THEN2905:%.*]], label [[IF_END2906:%.*]] +// SIMD-ONLY0: if.then2905: +// SIMD-ONLY0-NEXT: [[TMP1806:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1806]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2906]] +// SIMD-ONLY0: if.end2906: +// SIMD-ONLY0-NEXT: [[TMP1807:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1807]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1808:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1809:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2907:%.*]] = icmp slt i32 [[TMP1808]], [[TMP1809]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2907]], label [[IF_THEN2909:%.*]], label [[IF_END2910:%.*]] +// SIMD-ONLY0: if.then2909: +// SIMD-ONLY0-NEXT: [[TMP1810:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1810]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2910]] +// SIMD-ONLY0: if.end2910: +// SIMD-ONLY0-NEXT: [[TMP1811:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1811]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1812:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1813:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2911:%.*]] = icmp slt i32 [[TMP1812]], [[TMP1813]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2911]], label [[IF_THEN2913:%.*]], label [[IF_END2914:%.*]] +// SIMD-ONLY0: if.then2913: +// SIMD-ONLY0-NEXT: [[TMP1814:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1814]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2914]] +// SIMD-ONLY0: if.end2914: +// SIMD-ONLY0-NEXT: [[TMP1815:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1815]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1816:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1817:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2915:%.*]] = icmp eq i32 [[TMP1816]], [[TMP1817]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2915]], label [[IF_THEN2917:%.*]], label [[IF_END2918:%.*]] +// SIMD-ONLY0: if.then2917: +// SIMD-ONLY0-NEXT: [[TMP1818:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1818]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2918]] +// SIMD-ONLY0: if.end2918: +// SIMD-ONLY0-NEXT: [[TMP1819:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1819]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1820:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1821:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2919:%.*]] = icmp eq i32 [[TMP1820]], [[TMP1821]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2919]], label [[IF_THEN2921:%.*]], label [[IF_END2922:%.*]] +// SIMD-ONLY0: if.then2921: +// SIMD-ONLY0-NEXT: [[TMP1822:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1822]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2922]] +// SIMD-ONLY0: if.end2922: +// SIMD-ONLY0-NEXT: [[TMP1823:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1823]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1824:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1825:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2923:%.*]] = icmp eq i32 [[TMP1824]], [[TMP1825]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2923]], label [[IF_THEN2925:%.*]], label [[IF_ELSE2926:%.*]] +// SIMD-ONLY0: if.then2925: +// SIMD-ONLY0-NEXT: [[TMP1826:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1826]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2927:%.*]] +// SIMD-ONLY0: if.else2926: +// SIMD-ONLY0-NEXT: [[TMP1827:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1827]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2927]] +// SIMD-ONLY0: if.end2927: +// SIMD-ONLY0-NEXT: [[TMP1828:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1829:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2928:%.*]] = icmp eq i32 [[TMP1828]], [[TMP1829]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2928]], label [[IF_THEN2930:%.*]], label [[IF_ELSE2931:%.*]] +// SIMD-ONLY0: if.then2930: +// SIMD-ONLY0-NEXT: [[TMP1830:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1830]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2932:%.*]] +// SIMD-ONLY0: if.else2931: +// SIMD-ONLY0-NEXT: [[TMP1831:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1831]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2932]] +// SIMD-ONLY0: if.end2932: +// SIMD-ONLY0-NEXT: [[TMP1832:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1833:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2933:%.*]] = icmp eq i32 [[TMP1832]], [[TMP1833]] +// SIMD-ONLY0-NEXT: [[CONV2934:%.*]] = zext i1 [[CMP2933]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV2934]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1834:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL2935:%.*]] = icmp ne i32 [[TMP1834]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2935]], label [[IF_THEN2936:%.*]], label [[IF_END2937:%.*]] +// SIMD-ONLY0: if.then2936: +// SIMD-ONLY0-NEXT: [[TMP1835:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1835]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2937]] +// SIMD-ONLY0: if.end2937: +// SIMD-ONLY0-NEXT: [[TMP1836:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1837:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2938:%.*]] = icmp eq i32 [[TMP1836]], [[TMP1837]] +// SIMD-ONLY0-NEXT: [[CONV2939:%.*]] = zext i1 [[CMP2938]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV2939]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1838:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL2940:%.*]] = icmp ne i32 [[TMP1838]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2940]], label [[IF_THEN2941:%.*]], label [[IF_END2942:%.*]] +// SIMD-ONLY0: if.then2941: +// SIMD-ONLY0-NEXT: [[TMP1839:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1839]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2942]] +// SIMD-ONLY0: if.end2942: +// SIMD-ONLY0-NEXT: [[TMP1840:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1841:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2943:%.*]] = icmp eq i32 [[TMP1840]], [[TMP1841]] +// SIMD-ONLY0-NEXT: [[CONV2944:%.*]] = zext i1 [[CMP2943]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV2944]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1842:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL2945:%.*]] = icmp ne i32 [[TMP1842]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2945]], label [[IF_THEN2946:%.*]], label [[IF_ELSE2947:%.*]] +// SIMD-ONLY0: if.then2946: +// SIMD-ONLY0-NEXT: [[TMP1843:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1843]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2948:%.*]] +// SIMD-ONLY0: if.else2947: +// SIMD-ONLY0-NEXT: [[TMP1844:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1844]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2948]] +// SIMD-ONLY0: if.end2948: +// SIMD-ONLY0-NEXT: [[TMP1845:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1846:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2949:%.*]] = icmp eq i32 [[TMP1845]], [[TMP1846]] +// SIMD-ONLY0-NEXT: [[CONV2950:%.*]] = zext i1 [[CMP2949]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV2950]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1847:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL2951:%.*]] = icmp ne i32 [[TMP1847]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL2951]], label [[IF_THEN2952:%.*]], label [[IF_ELSE2953:%.*]] +// SIMD-ONLY0: if.then2952: +// SIMD-ONLY0-NEXT: [[TMP1848:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1848]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2954:%.*]] +// SIMD-ONLY0: if.else2953: +// SIMD-ONLY0-NEXT: [[TMP1849:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1849]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2954]] +// SIMD-ONLY0: if.end2954: +// SIMD-ONLY0-NEXT: [[TMP1850:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1850]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1851:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1852:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2955:%.*]] = icmp sgt i32 [[TMP1851]], [[TMP1852]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2955]], label [[IF_THEN2957:%.*]], label [[IF_END2958:%.*]] +// SIMD-ONLY0: if.then2957: +// SIMD-ONLY0-NEXT: [[TMP1853:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1853]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2958]] +// SIMD-ONLY0: if.end2958: +// SIMD-ONLY0-NEXT: [[TMP1854:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1854]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1855:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1856:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2959:%.*]] = icmp sgt i32 [[TMP1855]], [[TMP1856]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2959]], label [[IF_THEN2961:%.*]], label [[IF_END2962:%.*]] +// SIMD-ONLY0: if.then2961: +// SIMD-ONLY0-NEXT: [[TMP1857:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1857]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2962]] +// SIMD-ONLY0: if.end2962: +// SIMD-ONLY0-NEXT: [[TMP1858:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1858]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1859:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1860:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2963:%.*]] = icmp slt i32 [[TMP1859]], [[TMP1860]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2963]], label [[IF_THEN2965:%.*]], label [[IF_END2966:%.*]] +// SIMD-ONLY0: if.then2965: +// SIMD-ONLY0-NEXT: [[TMP1861:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1861]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2966]] +// SIMD-ONLY0: if.end2966: +// SIMD-ONLY0-NEXT: [[TMP1862:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1862]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1863:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1864:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2967:%.*]] = icmp slt i32 [[TMP1863]], [[TMP1864]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2967]], label [[IF_THEN2969:%.*]], label [[IF_END2970:%.*]] +// SIMD-ONLY0: if.then2969: +// SIMD-ONLY0-NEXT: [[TMP1865:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1865]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2970]] +// SIMD-ONLY0: if.end2970: +// SIMD-ONLY0-NEXT: [[TMP1866:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1866]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1867:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1868:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2971:%.*]] = icmp eq i32 [[TMP1867]], [[TMP1868]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2971]], label [[IF_THEN2973:%.*]], label [[IF_END2974:%.*]] +// SIMD-ONLY0: if.then2973: +// SIMD-ONLY0-NEXT: [[TMP1869:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1869]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2974]] +// SIMD-ONLY0: if.end2974: +// SIMD-ONLY0-NEXT: [[TMP1870:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1870]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1871:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1872:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2975:%.*]] = icmp eq i32 [[TMP1871]], [[TMP1872]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2975]], label [[IF_THEN2977:%.*]], label [[IF_END2978:%.*]] +// SIMD-ONLY0: if.then2977: +// SIMD-ONLY0-NEXT: [[TMP1873:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1873]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2978]] +// SIMD-ONLY0: if.end2978: +// SIMD-ONLY0-NEXT: [[TMP1874:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1875:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2979:%.*]] = icmp sgt i32 [[TMP1874]], [[TMP1875]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2979]], label [[IF_THEN2981:%.*]], label [[IF_END2982:%.*]] +// SIMD-ONLY0: if.then2981: +// SIMD-ONLY0-NEXT: [[TMP1876:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1876]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2982]] +// SIMD-ONLY0: if.end2982: +// SIMD-ONLY0-NEXT: [[TMP1877:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1877]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1878:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1879:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2983:%.*]] = icmp sgt i32 [[TMP1878]], [[TMP1879]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2983]], label [[IF_THEN2985:%.*]], label [[IF_END2986:%.*]] +// SIMD-ONLY0: if.then2985: +// SIMD-ONLY0-NEXT: [[TMP1880:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1880]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2986]] +// SIMD-ONLY0: if.end2986: +// SIMD-ONLY0-NEXT: [[TMP1881:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1881]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1882:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1883:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2987:%.*]] = icmp slt i32 [[TMP1882]], [[TMP1883]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2987]], label [[IF_THEN2989:%.*]], label [[IF_END2990:%.*]] +// SIMD-ONLY0: if.then2989: +// SIMD-ONLY0-NEXT: [[TMP1884:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1884]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2990]] +// SIMD-ONLY0: if.end2990: +// SIMD-ONLY0-NEXT: [[TMP1885:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1885]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1886:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1887:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2991:%.*]] = icmp slt i32 [[TMP1886]], [[TMP1887]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2991]], label [[IF_THEN2993:%.*]], label [[IF_END2994:%.*]] +// SIMD-ONLY0: if.then2993: +// SIMD-ONLY0-NEXT: [[TMP1888:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1888]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2994]] +// SIMD-ONLY0: if.end2994: +// SIMD-ONLY0-NEXT: [[TMP1889:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1889]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1890:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1891:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2995:%.*]] = icmp eq i32 [[TMP1890]], [[TMP1891]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2995]], label [[IF_THEN2997:%.*]], label [[IF_END2998:%.*]] +// SIMD-ONLY0: if.then2997: +// SIMD-ONLY0-NEXT: [[TMP1892:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1892]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END2998]] +// SIMD-ONLY0: if.end2998: +// SIMD-ONLY0-NEXT: [[TMP1893:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1893]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1894:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1895:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP2999:%.*]] = icmp eq i32 [[TMP1894]], [[TMP1895]] +// SIMD-ONLY0-NEXT: br i1 [[CMP2999]], label [[IF_THEN3001:%.*]], label [[IF_END3002:%.*]] +// SIMD-ONLY0: if.then3001: +// SIMD-ONLY0-NEXT: [[TMP1896:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1896]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3002]] +// SIMD-ONLY0: if.end3002: +// SIMD-ONLY0-NEXT: [[TMP1897:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1897]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1898:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1899:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3003:%.*]] = icmp eq i32 [[TMP1898]], [[TMP1899]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3003]], label [[IF_THEN3005:%.*]], label [[IF_ELSE3006:%.*]] +// SIMD-ONLY0: if.then3005: +// SIMD-ONLY0-NEXT: [[TMP1900:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1900]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3007:%.*]] +// SIMD-ONLY0: if.else3006: +// SIMD-ONLY0-NEXT: [[TMP1901:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1901]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3007]] +// SIMD-ONLY0: if.end3007: +// SIMD-ONLY0-NEXT: [[TMP1902:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1903:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3008:%.*]] = icmp eq i32 [[TMP1902]], [[TMP1903]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3008]], label [[IF_THEN3010:%.*]], label [[IF_ELSE3011:%.*]] +// SIMD-ONLY0: if.then3010: +// SIMD-ONLY0-NEXT: [[TMP1904:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1904]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3012:%.*]] +// SIMD-ONLY0: if.else3011: +// SIMD-ONLY0-NEXT: [[TMP1905:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1905]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3012]] +// SIMD-ONLY0: if.end3012: +// SIMD-ONLY0-NEXT: [[TMP1906:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1907:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3013:%.*]] = icmp eq i32 [[TMP1906]], [[TMP1907]] +// SIMD-ONLY0-NEXT: [[CONV3014:%.*]] = zext i1 [[CMP3013]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3014]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1908:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3015:%.*]] = icmp ne i32 [[TMP1908]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3015]], label [[IF_THEN3016:%.*]], label [[IF_END3017:%.*]] +// SIMD-ONLY0: if.then3016: +// SIMD-ONLY0-NEXT: [[TMP1909:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1909]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3017]] +// SIMD-ONLY0: if.end3017: +// SIMD-ONLY0-NEXT: [[TMP1910:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1911:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3018:%.*]] = icmp eq i32 [[TMP1910]], [[TMP1911]] +// SIMD-ONLY0-NEXT: [[CONV3019:%.*]] = zext i1 [[CMP3018]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3019]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1912:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3020:%.*]] = icmp ne i32 [[TMP1912]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3020]], label [[IF_THEN3021:%.*]], label [[IF_END3022:%.*]] +// SIMD-ONLY0: if.then3021: +// SIMD-ONLY0-NEXT: [[TMP1913:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1913]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3022]] +// SIMD-ONLY0: if.end3022: +// SIMD-ONLY0-NEXT: [[TMP1914:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1915:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3023:%.*]] = icmp eq i32 [[TMP1914]], [[TMP1915]] +// SIMD-ONLY0-NEXT: [[CONV3024:%.*]] = zext i1 [[CMP3023]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3024]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1916:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3025:%.*]] = icmp ne i32 [[TMP1916]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3025]], label [[IF_THEN3026:%.*]], label [[IF_ELSE3027:%.*]] +// SIMD-ONLY0: if.then3026: +// SIMD-ONLY0-NEXT: [[TMP1917:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1917]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3028:%.*]] +// SIMD-ONLY0: if.else3027: +// SIMD-ONLY0-NEXT: [[TMP1918:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1918]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3028]] +// SIMD-ONLY0: if.end3028: +// SIMD-ONLY0-NEXT: [[TMP1919:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1920:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3029:%.*]] = icmp eq i32 [[TMP1919]], [[TMP1920]] +// SIMD-ONLY0-NEXT: [[CONV3030:%.*]] = zext i1 [[CMP3029]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3030]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1921:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3031:%.*]] = icmp ne i32 [[TMP1921]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3031]], label [[IF_THEN3032:%.*]], label [[IF_ELSE3033:%.*]] +// SIMD-ONLY0: if.then3032: +// SIMD-ONLY0-NEXT: [[TMP1922:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1922]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3034:%.*]] +// SIMD-ONLY0: if.else3033: +// SIMD-ONLY0-NEXT: [[TMP1923:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1923]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3034]] +// SIMD-ONLY0: if.end3034: +// SIMD-ONLY0-NEXT: [[TMP1924:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1924]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1925:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1926:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3035:%.*]] = icmp sgt i32 [[TMP1925]], [[TMP1926]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3035]], label [[IF_THEN3037:%.*]], label [[IF_END3038:%.*]] +// SIMD-ONLY0: if.then3037: +// SIMD-ONLY0-NEXT: [[TMP1927:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1927]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3038]] +// SIMD-ONLY0: if.end3038: +// SIMD-ONLY0-NEXT: [[TMP1928:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1928]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1929:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1930:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3039:%.*]] = icmp sgt i32 [[TMP1929]], [[TMP1930]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3039]], label [[IF_THEN3041:%.*]], label [[IF_END3042:%.*]] +// SIMD-ONLY0: if.then3041: +// SIMD-ONLY0-NEXT: [[TMP1931:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1931]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3042]] +// SIMD-ONLY0: if.end3042: +// SIMD-ONLY0-NEXT: [[TMP1932:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1932]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1933:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1934:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3043:%.*]] = icmp slt i32 [[TMP1933]], [[TMP1934]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3043]], label [[IF_THEN3045:%.*]], label [[IF_END3046:%.*]] +// SIMD-ONLY0: if.then3045: +// SIMD-ONLY0-NEXT: [[TMP1935:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1935]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3046]] +// SIMD-ONLY0: if.end3046: +// SIMD-ONLY0-NEXT: [[TMP1936:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1936]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1937:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1938:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3047:%.*]] = icmp slt i32 [[TMP1937]], [[TMP1938]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3047]], label [[IF_THEN3049:%.*]], label [[IF_END3050:%.*]] +// SIMD-ONLY0: if.then3049: +// SIMD-ONLY0-NEXT: [[TMP1939:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1939]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3050]] +// SIMD-ONLY0: if.end3050: +// SIMD-ONLY0-NEXT: [[TMP1940:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1940]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1941:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1942:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3051:%.*]] = icmp eq i32 [[TMP1941]], [[TMP1942]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3051]], label [[IF_THEN3053:%.*]], label [[IF_END3054:%.*]] +// SIMD-ONLY0: if.then3053: +// SIMD-ONLY0-NEXT: [[TMP1943:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1943]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3054]] +// SIMD-ONLY0: if.end3054: +// SIMD-ONLY0-NEXT: [[TMP1944:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1944]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1945:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1946:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3055:%.*]] = icmp eq i32 [[TMP1945]], [[TMP1946]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3055]], label [[IF_THEN3057:%.*]], label [[IF_END3058:%.*]] +// SIMD-ONLY0: if.then3057: +// SIMD-ONLY0-NEXT: [[TMP1947:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1947]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3058]] +// SIMD-ONLY0: if.end3058: +// SIMD-ONLY0-NEXT: [[TMP1948:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1949:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3059:%.*]] = icmp sgt i32 [[TMP1948]], [[TMP1949]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3059]], label [[IF_THEN3061:%.*]], label [[IF_END3062:%.*]] +// SIMD-ONLY0: if.then3061: +// SIMD-ONLY0-NEXT: [[TMP1950:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1950]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3062]] +// SIMD-ONLY0: if.end3062: +// SIMD-ONLY0-NEXT: [[TMP1951:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1951]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1952:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1953:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3063:%.*]] = icmp sgt i32 [[TMP1952]], [[TMP1953]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3063]], label [[IF_THEN3065:%.*]], label [[IF_END3066:%.*]] +// SIMD-ONLY0: if.then3065: +// SIMD-ONLY0-NEXT: [[TMP1954:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1954]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3066]] +// SIMD-ONLY0: if.end3066: +// SIMD-ONLY0-NEXT: [[TMP1955:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1955]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1956:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1957:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3067:%.*]] = icmp slt i32 [[TMP1956]], [[TMP1957]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3067]], label [[IF_THEN3069:%.*]], label [[IF_END3070:%.*]] +// SIMD-ONLY0: if.then3069: +// SIMD-ONLY0-NEXT: [[TMP1958:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1958]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3070]] +// SIMD-ONLY0: if.end3070: +// SIMD-ONLY0-NEXT: [[TMP1959:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1959]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1960:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1961:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3071:%.*]] = icmp slt i32 [[TMP1960]], [[TMP1961]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3071]], label [[IF_THEN3073:%.*]], label [[IF_END3074:%.*]] +// SIMD-ONLY0: if.then3073: +// SIMD-ONLY0-NEXT: [[TMP1962:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1962]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3074]] +// SIMD-ONLY0: if.end3074: +// SIMD-ONLY0-NEXT: [[TMP1963:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1963]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1964:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1965:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3075:%.*]] = icmp eq i32 [[TMP1964]], [[TMP1965]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3075]], label [[IF_THEN3077:%.*]], label [[IF_END3078:%.*]] +// SIMD-ONLY0: if.then3077: +// SIMD-ONLY0-NEXT: [[TMP1966:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1966]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3078]] +// SIMD-ONLY0: if.end3078: +// SIMD-ONLY0-NEXT: [[TMP1967:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1967]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1968:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1969:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3079:%.*]] = icmp eq i32 [[TMP1968]], [[TMP1969]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3079]], label [[IF_THEN3081:%.*]], label [[IF_END3082:%.*]] +// SIMD-ONLY0: if.then3081: +// SIMD-ONLY0-NEXT: [[TMP1970:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1970]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3082]] +// SIMD-ONLY0: if.end3082: +// SIMD-ONLY0-NEXT: [[TMP1971:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1971]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1972:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1973:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3083:%.*]] = icmp eq i32 [[TMP1972]], [[TMP1973]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3083]], label [[IF_THEN3085:%.*]], label [[IF_ELSE3086:%.*]] +// SIMD-ONLY0: if.then3085: +// SIMD-ONLY0-NEXT: [[TMP1974:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1974]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3087:%.*]] +// SIMD-ONLY0: if.else3086: +// SIMD-ONLY0-NEXT: [[TMP1975:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1975]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3087]] +// SIMD-ONLY0: if.end3087: +// SIMD-ONLY0-NEXT: [[TMP1976:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1977:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3088:%.*]] = icmp eq i32 [[TMP1976]], [[TMP1977]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3088]], label [[IF_THEN3090:%.*]], label [[IF_ELSE3091:%.*]] +// SIMD-ONLY0: if.then3090: +// SIMD-ONLY0-NEXT: [[TMP1978:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1978]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3092:%.*]] +// SIMD-ONLY0: if.else3091: +// SIMD-ONLY0-NEXT: [[TMP1979:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1979]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3092]] +// SIMD-ONLY0: if.end3092: +// SIMD-ONLY0-NEXT: [[TMP1980:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1981:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3093:%.*]] = icmp eq i32 [[TMP1980]], [[TMP1981]] +// SIMD-ONLY0-NEXT: [[CONV3094:%.*]] = zext i1 [[CMP3093]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3094]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1982:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3095:%.*]] = icmp ne i32 [[TMP1982]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3095]], label [[IF_THEN3096:%.*]], label [[IF_END3097:%.*]] +// SIMD-ONLY0: if.then3096: +// SIMD-ONLY0-NEXT: [[TMP1983:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1983]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3097]] +// SIMD-ONLY0: if.end3097: +// SIMD-ONLY0-NEXT: [[TMP1984:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1985:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3098:%.*]] = icmp eq i32 [[TMP1984]], [[TMP1985]] +// SIMD-ONLY0-NEXT: [[CONV3099:%.*]] = zext i1 [[CMP3098]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3099]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1986:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3100:%.*]] = icmp ne i32 [[TMP1986]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3100]], label [[IF_THEN3101:%.*]], label [[IF_END3102:%.*]] +// SIMD-ONLY0: if.then3101: +// SIMD-ONLY0-NEXT: [[TMP1987:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1987]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3102]] +// SIMD-ONLY0: if.end3102: +// SIMD-ONLY0-NEXT: [[TMP1988:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1989:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3103:%.*]] = icmp eq i32 [[TMP1988]], [[TMP1989]] +// SIMD-ONLY0-NEXT: [[CONV3104:%.*]] = zext i1 [[CMP3103]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3104]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1990:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3105:%.*]] = icmp ne i32 [[TMP1990]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3105]], label [[IF_THEN3106:%.*]], label [[IF_ELSE3107:%.*]] +// SIMD-ONLY0: if.then3106: +// SIMD-ONLY0-NEXT: [[TMP1991:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1991]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3108:%.*]] +// SIMD-ONLY0: if.else3107: +// SIMD-ONLY0-NEXT: [[TMP1992:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1992]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3108]] +// SIMD-ONLY0: if.end3108: +// SIMD-ONLY0-NEXT: [[TMP1993:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1994:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3109:%.*]] = icmp eq i32 [[TMP1993]], [[TMP1994]] +// SIMD-ONLY0-NEXT: [[CONV3110:%.*]] = zext i1 [[CMP3109]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3110]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1995:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3111:%.*]] = icmp ne i32 [[TMP1995]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3111]], label [[IF_THEN3112:%.*]], label [[IF_ELSE3113:%.*]] +// SIMD-ONLY0: if.then3112: +// SIMD-ONLY0-NEXT: [[TMP1996:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1996]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3114:%.*]] +// SIMD-ONLY0: if.else3113: +// SIMD-ONLY0-NEXT: [[TMP1997:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1997]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3114]] +// SIMD-ONLY0: if.end3114: +// SIMD-ONLY0-NEXT: [[TMP1998:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP1998]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1999:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2000:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3115:%.*]] = icmp sgt i32 [[TMP1999]], [[TMP2000]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3115]], label [[IF_THEN3117:%.*]], label [[IF_END3118:%.*]] +// SIMD-ONLY0: if.then3117: +// SIMD-ONLY0-NEXT: [[TMP2001:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2001]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3118]] +// SIMD-ONLY0: if.end3118: +// SIMD-ONLY0-NEXT: [[TMP2002:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2002]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2003:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2004:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3119:%.*]] = icmp sgt i32 [[TMP2003]], [[TMP2004]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3119]], label [[IF_THEN3121:%.*]], label [[IF_END3122:%.*]] +// SIMD-ONLY0: if.then3121: +// SIMD-ONLY0-NEXT: [[TMP2005:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2005]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3122]] +// SIMD-ONLY0: if.end3122: +// SIMD-ONLY0-NEXT: [[TMP2006:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2006]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2007:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2008:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3123:%.*]] = icmp slt i32 [[TMP2007]], [[TMP2008]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3123]], label [[IF_THEN3125:%.*]], label [[IF_END3126:%.*]] +// SIMD-ONLY0: if.then3125: +// SIMD-ONLY0-NEXT: [[TMP2009:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2009]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3126]] +// SIMD-ONLY0: if.end3126: +// SIMD-ONLY0-NEXT: [[TMP2010:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2010]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2011:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2012:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3127:%.*]] = icmp slt i32 [[TMP2011]], [[TMP2012]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3127]], label [[IF_THEN3129:%.*]], label [[IF_END3130:%.*]] +// SIMD-ONLY0: if.then3129: +// SIMD-ONLY0-NEXT: [[TMP2013:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2013]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3130]] +// SIMD-ONLY0: if.end3130: +// SIMD-ONLY0-NEXT: [[TMP2014:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2014]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2015:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2016:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3131:%.*]] = icmp eq i32 [[TMP2015]], [[TMP2016]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3131]], label [[IF_THEN3133:%.*]], label [[IF_END3134:%.*]] +// SIMD-ONLY0: if.then3133: +// SIMD-ONLY0-NEXT: [[TMP2017:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2017]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3134]] +// SIMD-ONLY0: if.end3134: +// SIMD-ONLY0-NEXT: [[TMP2018:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2018]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2019:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2020:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3135:%.*]] = icmp eq i32 [[TMP2019]], [[TMP2020]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3135]], label [[IF_THEN3137:%.*]], label [[IF_END3138:%.*]] +// SIMD-ONLY0: if.then3137: +// SIMD-ONLY0-NEXT: [[TMP2021:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2021]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3138]] +// SIMD-ONLY0: if.end3138: +// SIMD-ONLY0-NEXT: [[TMP2022:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2023:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3139:%.*]] = icmp sgt i32 [[TMP2022]], [[TMP2023]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3139]], label [[IF_THEN3141:%.*]], label [[IF_END3142:%.*]] +// SIMD-ONLY0: if.then3141: +// SIMD-ONLY0-NEXT: [[TMP2024:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2024]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3142]] +// SIMD-ONLY0: if.end3142: +// SIMD-ONLY0-NEXT: [[TMP2025:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2025]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2026:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2027:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3143:%.*]] = icmp sgt i32 [[TMP2026]], [[TMP2027]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3143]], label [[IF_THEN3145:%.*]], label [[IF_END3146:%.*]] +// SIMD-ONLY0: if.then3145: +// SIMD-ONLY0-NEXT: [[TMP2028:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2028]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3146]] +// SIMD-ONLY0: if.end3146: +// SIMD-ONLY0-NEXT: [[TMP2029:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2029]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2030:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2031:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3147:%.*]] = icmp slt i32 [[TMP2030]], [[TMP2031]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3147]], label [[IF_THEN3149:%.*]], label [[IF_END3150:%.*]] +// SIMD-ONLY0: if.then3149: +// SIMD-ONLY0-NEXT: [[TMP2032:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2032]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3150]] +// SIMD-ONLY0: if.end3150: +// SIMD-ONLY0-NEXT: [[TMP2033:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2033]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2034:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2035:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3151:%.*]] = icmp slt i32 [[TMP2034]], [[TMP2035]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3151]], label [[IF_THEN3153:%.*]], label [[IF_END3154:%.*]] +// SIMD-ONLY0: if.then3153: +// SIMD-ONLY0-NEXT: [[TMP2036:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2036]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3154]] +// SIMD-ONLY0: if.end3154: +// SIMD-ONLY0-NEXT: [[TMP2037:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2037]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2038:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2039:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3155:%.*]] = icmp eq i32 [[TMP2038]], [[TMP2039]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3155]], label [[IF_THEN3157:%.*]], label [[IF_END3158:%.*]] +// SIMD-ONLY0: if.then3157: +// SIMD-ONLY0-NEXT: [[TMP2040:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2040]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3158]] +// SIMD-ONLY0: if.end3158: +// SIMD-ONLY0-NEXT: [[TMP2041:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2041]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2042:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2043:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3159:%.*]] = icmp eq i32 [[TMP2042]], [[TMP2043]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3159]], label [[IF_THEN3161:%.*]], label [[IF_END3162:%.*]] +// SIMD-ONLY0: if.then3161: +// SIMD-ONLY0-NEXT: [[TMP2044:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2044]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3162]] +// SIMD-ONLY0: if.end3162: +// SIMD-ONLY0-NEXT: [[TMP2045:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2045]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2046:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2047:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3163:%.*]] = icmp eq i32 [[TMP2046]], [[TMP2047]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3163]], label [[IF_THEN3165:%.*]], label [[IF_ELSE3166:%.*]] +// SIMD-ONLY0: if.then3165: +// SIMD-ONLY0-NEXT: [[TMP2048:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2048]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3167:%.*]] +// SIMD-ONLY0: if.else3166: +// SIMD-ONLY0-NEXT: [[TMP2049:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2049]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3167]] +// SIMD-ONLY0: if.end3167: +// SIMD-ONLY0-NEXT: [[TMP2050:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2051:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3168:%.*]] = icmp eq i32 [[TMP2050]], [[TMP2051]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3168]], label [[IF_THEN3170:%.*]], label [[IF_ELSE3171:%.*]] +// SIMD-ONLY0: if.then3170: +// SIMD-ONLY0-NEXT: [[TMP2052:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2052]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3172:%.*]] +// SIMD-ONLY0: if.else3171: +// SIMD-ONLY0-NEXT: [[TMP2053:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2053]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3172]] +// SIMD-ONLY0: if.end3172: +// SIMD-ONLY0-NEXT: [[TMP2054:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2055:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3173:%.*]] = icmp eq i32 [[TMP2054]], [[TMP2055]] +// SIMD-ONLY0-NEXT: [[CONV3174:%.*]] = zext i1 [[CMP3173]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3174]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2056:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3175:%.*]] = icmp ne i32 [[TMP2056]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3175]], label [[IF_THEN3176:%.*]], label [[IF_END3177:%.*]] +// SIMD-ONLY0: if.then3176: +// SIMD-ONLY0-NEXT: [[TMP2057:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2057]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3177]] +// SIMD-ONLY0: if.end3177: +// SIMD-ONLY0-NEXT: [[TMP2058:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2059:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3178:%.*]] = icmp eq i32 [[TMP2058]], [[TMP2059]] +// SIMD-ONLY0-NEXT: [[CONV3179:%.*]] = zext i1 [[CMP3178]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3179]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2060:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3180:%.*]] = icmp ne i32 [[TMP2060]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3180]], label [[IF_THEN3181:%.*]], label [[IF_END3182:%.*]] +// SIMD-ONLY0: if.then3181: +// SIMD-ONLY0-NEXT: [[TMP2061:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2061]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3182]] +// SIMD-ONLY0: if.end3182: +// SIMD-ONLY0-NEXT: [[TMP2062:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2063:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3183:%.*]] = icmp eq i32 [[TMP2062]], [[TMP2063]] +// SIMD-ONLY0-NEXT: [[CONV3184:%.*]] = zext i1 [[CMP3183]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3184]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2064:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3185:%.*]] = icmp ne i32 [[TMP2064]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3185]], label [[IF_THEN3186:%.*]], label [[IF_ELSE3187:%.*]] +// SIMD-ONLY0: if.then3186: +// SIMD-ONLY0-NEXT: [[TMP2065:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2065]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3188:%.*]] +// SIMD-ONLY0: if.else3187: +// SIMD-ONLY0-NEXT: [[TMP2066:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2066]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3188]] +// SIMD-ONLY0: if.end3188: +// SIMD-ONLY0-NEXT: [[TMP2067:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2068:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3189:%.*]] = icmp eq i32 [[TMP2067]], [[TMP2068]] +// SIMD-ONLY0-NEXT: [[CONV3190:%.*]] = zext i1 [[CMP3189]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3190]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2069:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3191:%.*]] = icmp ne i32 [[TMP2069]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3191]], label [[IF_THEN3192:%.*]], label [[IF_ELSE3193:%.*]] +// SIMD-ONLY0: if.then3192: +// SIMD-ONLY0-NEXT: [[TMP2070:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2070]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3194:%.*]] +// SIMD-ONLY0: if.else3193: +// SIMD-ONLY0-NEXT: [[TMP2071:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2071]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3194]] +// SIMD-ONLY0: if.end3194: +// SIMD-ONLY0-NEXT: [[TMP2072:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2072]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2073:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2074:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3195:%.*]] = icmp sgt i32 [[TMP2073]], [[TMP2074]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3195]], label [[IF_THEN3197:%.*]], label [[IF_END3198:%.*]] +// SIMD-ONLY0: if.then3197: +// SIMD-ONLY0-NEXT: [[TMP2075:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2075]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3198]] +// SIMD-ONLY0: if.end3198: +// SIMD-ONLY0-NEXT: [[TMP2076:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2076]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2077:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2078:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3199:%.*]] = icmp sgt i32 [[TMP2077]], [[TMP2078]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3199]], label [[IF_THEN3201:%.*]], label [[IF_END3202:%.*]] +// SIMD-ONLY0: if.then3201: +// SIMD-ONLY0-NEXT: [[TMP2079:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2079]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3202]] +// SIMD-ONLY0: if.end3202: +// SIMD-ONLY0-NEXT: [[TMP2080:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2080]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2081:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2082:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3203:%.*]] = icmp slt i32 [[TMP2081]], [[TMP2082]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3203]], label [[IF_THEN3205:%.*]], label [[IF_END3206:%.*]] +// SIMD-ONLY0: if.then3205: +// SIMD-ONLY0-NEXT: [[TMP2083:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2083]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3206]] +// SIMD-ONLY0: if.end3206: +// SIMD-ONLY0-NEXT: [[TMP2084:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2084]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2085:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2086:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3207:%.*]] = icmp slt i32 [[TMP2085]], [[TMP2086]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3207]], label [[IF_THEN3209:%.*]], label [[IF_END3210:%.*]] +// SIMD-ONLY0: if.then3209: +// SIMD-ONLY0-NEXT: [[TMP2087:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2087]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3210]] +// SIMD-ONLY0: if.end3210: +// SIMD-ONLY0-NEXT: [[TMP2088:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2088]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2089:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2090:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3211:%.*]] = icmp eq i32 [[TMP2089]], [[TMP2090]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3211]], label [[IF_THEN3213:%.*]], label [[IF_END3214:%.*]] +// SIMD-ONLY0: if.then3213: +// SIMD-ONLY0-NEXT: [[TMP2091:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2091]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3214]] +// SIMD-ONLY0: if.end3214: +// SIMD-ONLY0-NEXT: [[TMP2092:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2092]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2093:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2094:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3215:%.*]] = icmp eq i32 [[TMP2093]], [[TMP2094]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3215]], label [[IF_THEN3217:%.*]], label [[IF_END3218:%.*]] +// SIMD-ONLY0: if.then3217: +// SIMD-ONLY0-NEXT: [[TMP2095:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2095]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3218]] +// SIMD-ONLY0: if.end3218: +// SIMD-ONLY0-NEXT: [[TMP2096:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2097:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3219:%.*]] = icmp sgt i32 [[TMP2096]], [[TMP2097]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3219]], label [[IF_THEN3221:%.*]], label [[IF_END3222:%.*]] +// SIMD-ONLY0: if.then3221: +// SIMD-ONLY0-NEXT: [[TMP2098:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2098]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3222]] +// SIMD-ONLY0: if.end3222: +// SIMD-ONLY0-NEXT: [[TMP2099:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2099]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2100:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2101:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3223:%.*]] = icmp sgt i32 [[TMP2100]], [[TMP2101]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3223]], label [[IF_THEN3225:%.*]], label [[IF_END3226:%.*]] +// SIMD-ONLY0: if.then3225: +// SIMD-ONLY0-NEXT: [[TMP2102:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2102]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3226]] +// SIMD-ONLY0: if.end3226: +// SIMD-ONLY0-NEXT: [[TMP2103:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2103]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2104:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2105:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3227:%.*]] = icmp slt i32 [[TMP2104]], [[TMP2105]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3227]], label [[IF_THEN3229:%.*]], label [[IF_END3230:%.*]] +// SIMD-ONLY0: if.then3229: +// SIMD-ONLY0-NEXT: [[TMP2106:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2106]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3230]] +// SIMD-ONLY0: if.end3230: +// SIMD-ONLY0-NEXT: [[TMP2107:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2107]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2108:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2109:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3231:%.*]] = icmp slt i32 [[TMP2108]], [[TMP2109]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3231]], label [[IF_THEN3233:%.*]], label [[IF_END3234:%.*]] +// SIMD-ONLY0: if.then3233: +// SIMD-ONLY0-NEXT: [[TMP2110:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2110]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3234]] +// SIMD-ONLY0: if.end3234: +// SIMD-ONLY0-NEXT: [[TMP2111:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2111]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2112:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2113:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3235:%.*]] = icmp eq i32 [[TMP2112]], [[TMP2113]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3235]], label [[IF_THEN3237:%.*]], label [[IF_END3238:%.*]] +// SIMD-ONLY0: if.then3237: +// SIMD-ONLY0-NEXT: [[TMP2114:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2114]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3238]] +// SIMD-ONLY0: if.end3238: +// SIMD-ONLY0-NEXT: [[TMP2115:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2115]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2116:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2117:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3239:%.*]] = icmp eq i32 [[TMP2116]], [[TMP2117]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3239]], label [[IF_THEN3241:%.*]], label [[IF_END3242:%.*]] +// SIMD-ONLY0: if.then3241: +// SIMD-ONLY0-NEXT: [[TMP2118:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2118]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3242]] +// SIMD-ONLY0: if.end3242: +// SIMD-ONLY0-NEXT: [[TMP2119:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2119]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2120:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2121:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3243:%.*]] = icmp eq i32 [[TMP2120]], [[TMP2121]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3243]], label [[IF_THEN3245:%.*]], label [[IF_ELSE3246:%.*]] +// SIMD-ONLY0: if.then3245: +// SIMD-ONLY0-NEXT: [[TMP2122:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2122]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3247:%.*]] +// SIMD-ONLY0: if.else3246: +// SIMD-ONLY0-NEXT: [[TMP2123:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2123]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3247]] +// SIMD-ONLY0: if.end3247: +// SIMD-ONLY0-NEXT: [[TMP2124:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2125:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3248:%.*]] = icmp eq i32 [[TMP2124]], [[TMP2125]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3248]], label [[IF_THEN3250:%.*]], label [[IF_ELSE3251:%.*]] +// SIMD-ONLY0: if.then3250: +// SIMD-ONLY0-NEXT: [[TMP2126:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2126]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3252:%.*]] +// SIMD-ONLY0: if.else3251: +// SIMD-ONLY0-NEXT: [[TMP2127:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2127]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3252]] +// SIMD-ONLY0: if.end3252: +// SIMD-ONLY0-NEXT: [[TMP2128:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2129:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3253:%.*]] = icmp eq i32 [[TMP2128]], [[TMP2129]] +// SIMD-ONLY0-NEXT: [[CONV3254:%.*]] = zext i1 [[CMP3253]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3254]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2130:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3255:%.*]] = icmp ne i32 [[TMP2130]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3255]], label [[IF_THEN3256:%.*]], label [[IF_END3257:%.*]] +// SIMD-ONLY0: if.then3256: +// SIMD-ONLY0-NEXT: [[TMP2131:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2131]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3257]] +// SIMD-ONLY0: if.end3257: +// SIMD-ONLY0-NEXT: [[TMP2132:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2133:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3258:%.*]] = icmp eq i32 [[TMP2132]], [[TMP2133]] +// SIMD-ONLY0-NEXT: [[CONV3259:%.*]] = zext i1 [[CMP3258]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3259]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2134:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3260:%.*]] = icmp ne i32 [[TMP2134]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3260]], label [[IF_THEN3261:%.*]], label [[IF_END3262:%.*]] +// SIMD-ONLY0: if.then3261: +// SIMD-ONLY0-NEXT: [[TMP2135:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2135]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3262]] +// SIMD-ONLY0: if.end3262: +// SIMD-ONLY0-NEXT: [[TMP2136:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2137:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3263:%.*]] = icmp eq i32 [[TMP2136]], [[TMP2137]] +// SIMD-ONLY0-NEXT: [[CONV3264:%.*]] = zext i1 [[CMP3263]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3264]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2138:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3265:%.*]] = icmp ne i32 [[TMP2138]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3265]], label [[IF_THEN3266:%.*]], label [[IF_ELSE3267:%.*]] +// SIMD-ONLY0: if.then3266: +// SIMD-ONLY0-NEXT: [[TMP2139:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2139]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3268:%.*]] +// SIMD-ONLY0: if.else3267: +// SIMD-ONLY0-NEXT: [[TMP2140:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2140]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3268]] +// SIMD-ONLY0: if.end3268: +// SIMD-ONLY0-NEXT: [[TMP2141:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2142:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3269:%.*]] = icmp eq i32 [[TMP2141]], [[TMP2142]] +// SIMD-ONLY0-NEXT: [[CONV3270:%.*]] = zext i1 [[CMP3269]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3270]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2143:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3271:%.*]] = icmp ne i32 [[TMP2143]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3271]], label [[IF_THEN3272:%.*]], label [[IF_ELSE3273:%.*]] +// SIMD-ONLY0: if.then3272: +// SIMD-ONLY0-NEXT: [[TMP2144:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2144]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3274:%.*]] +// SIMD-ONLY0: if.else3273: +// SIMD-ONLY0-NEXT: [[TMP2145:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2145]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3274]] +// SIMD-ONLY0: if.end3274: +// SIMD-ONLY0-NEXT: [[TMP2146:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2146]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2147:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2148:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3275:%.*]] = icmp sgt i32 [[TMP2147]], [[TMP2148]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3275]], label [[IF_THEN3277:%.*]], label [[IF_END3278:%.*]] +// SIMD-ONLY0: if.then3277: +// SIMD-ONLY0-NEXT: [[TMP2149:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2149]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3278]] +// SIMD-ONLY0: if.end3278: +// SIMD-ONLY0-NEXT: [[TMP2150:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2150]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2151:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2152:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3279:%.*]] = icmp sgt i32 [[TMP2151]], [[TMP2152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3279]], label [[IF_THEN3281:%.*]], label [[IF_END3282:%.*]] +// SIMD-ONLY0: if.then3281: +// SIMD-ONLY0-NEXT: [[TMP2153:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2153]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3282]] +// SIMD-ONLY0: if.end3282: +// SIMD-ONLY0-NEXT: [[TMP2154:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2154]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2155:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2156:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3283:%.*]] = icmp slt i32 [[TMP2155]], [[TMP2156]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3283]], label [[IF_THEN3285:%.*]], label [[IF_END3286:%.*]] +// SIMD-ONLY0: if.then3285: +// SIMD-ONLY0-NEXT: [[TMP2157:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2157]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3286]] +// SIMD-ONLY0: if.end3286: +// SIMD-ONLY0-NEXT: [[TMP2158:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2158]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2159:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2160:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3287:%.*]] = icmp slt i32 [[TMP2159]], [[TMP2160]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3287]], label [[IF_THEN3289:%.*]], label [[IF_END3290:%.*]] +// SIMD-ONLY0: if.then3289: +// SIMD-ONLY0-NEXT: [[TMP2161:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2161]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3290]] +// SIMD-ONLY0: if.end3290: +// SIMD-ONLY0-NEXT: [[TMP2162:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2162]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2163:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2164:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3291:%.*]] = icmp eq i32 [[TMP2163]], [[TMP2164]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3291]], label [[IF_THEN3293:%.*]], label [[IF_END3294:%.*]] +// SIMD-ONLY0: if.then3293: +// SIMD-ONLY0-NEXT: [[TMP2165:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2165]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3294]] +// SIMD-ONLY0: if.end3294: +// SIMD-ONLY0-NEXT: [[TMP2166:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2166]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2167:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2168:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3295:%.*]] = icmp eq i32 [[TMP2167]], [[TMP2168]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3295]], label [[IF_THEN3297:%.*]], label [[IF_END3298:%.*]] +// SIMD-ONLY0: if.then3297: +// SIMD-ONLY0-NEXT: [[TMP2169:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2169]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3298]] +// SIMD-ONLY0: if.end3298: +// SIMD-ONLY0-NEXT: [[TMP2170:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2171:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3299:%.*]] = icmp sgt i32 [[TMP2170]], [[TMP2171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3299]], label [[IF_THEN3301:%.*]], label [[IF_END3302:%.*]] +// SIMD-ONLY0: if.then3301: +// SIMD-ONLY0-NEXT: [[TMP2172:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2172]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3302]] +// SIMD-ONLY0: if.end3302: +// SIMD-ONLY0-NEXT: [[TMP2173:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2173]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2174:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2175:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3303:%.*]] = icmp sgt i32 [[TMP2174]], [[TMP2175]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3303]], label [[IF_THEN3305:%.*]], label [[IF_END3306:%.*]] +// SIMD-ONLY0: if.then3305: +// SIMD-ONLY0-NEXT: [[TMP2176:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2176]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3306]] +// SIMD-ONLY0: if.end3306: +// SIMD-ONLY0-NEXT: [[TMP2177:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2177]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2178:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2179:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3307:%.*]] = icmp slt i32 [[TMP2178]], [[TMP2179]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3307]], label [[IF_THEN3309:%.*]], label [[IF_END3310:%.*]] +// SIMD-ONLY0: if.then3309: +// SIMD-ONLY0-NEXT: [[TMP2180:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2180]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3310]] +// SIMD-ONLY0: if.end3310: +// SIMD-ONLY0-NEXT: [[TMP2181:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2181]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2182:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2183:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3311:%.*]] = icmp slt i32 [[TMP2182]], [[TMP2183]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3311]], label [[IF_THEN3313:%.*]], label [[IF_END3314:%.*]] +// SIMD-ONLY0: if.then3313: +// SIMD-ONLY0-NEXT: [[TMP2184:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2184]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3314]] +// SIMD-ONLY0: if.end3314: +// SIMD-ONLY0-NEXT: [[TMP2185:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2185]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2186:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2187:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3315:%.*]] = icmp eq i32 [[TMP2186]], [[TMP2187]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3315]], label [[IF_THEN3317:%.*]], label [[IF_END3318:%.*]] +// SIMD-ONLY0: if.then3317: +// SIMD-ONLY0-NEXT: [[TMP2188:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2188]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3318]] +// SIMD-ONLY0: if.end3318: +// SIMD-ONLY0-NEXT: [[TMP2189:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2189]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2190:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2191:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3319:%.*]] = icmp eq i32 [[TMP2190]], [[TMP2191]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3319]], label [[IF_THEN3321:%.*]], label [[IF_END3322:%.*]] +// SIMD-ONLY0: if.then3321: +// SIMD-ONLY0-NEXT: [[TMP2192:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2192]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3322]] +// SIMD-ONLY0: if.end3322: +// SIMD-ONLY0-NEXT: [[TMP2193:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2193]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2194:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2195:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3323:%.*]] = icmp eq i32 [[TMP2194]], [[TMP2195]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3323]], label [[IF_THEN3325:%.*]], label [[IF_ELSE3326:%.*]] +// SIMD-ONLY0: if.then3325: +// SIMD-ONLY0-NEXT: [[TMP2196:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2196]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3327:%.*]] +// SIMD-ONLY0: if.else3326: +// SIMD-ONLY0-NEXT: [[TMP2197:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2197]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3327]] +// SIMD-ONLY0: if.end3327: +// SIMD-ONLY0-NEXT: [[TMP2198:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2199:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3328:%.*]] = icmp eq i32 [[TMP2198]], [[TMP2199]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3328]], label [[IF_THEN3330:%.*]], label [[IF_ELSE3331:%.*]] +// SIMD-ONLY0: if.then3330: +// SIMD-ONLY0-NEXT: [[TMP2200:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2200]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3332:%.*]] +// SIMD-ONLY0: if.else3331: +// SIMD-ONLY0-NEXT: [[TMP2201:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2201]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3332]] +// SIMD-ONLY0: if.end3332: +// SIMD-ONLY0-NEXT: [[TMP2202:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2203:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3333:%.*]] = icmp eq i32 [[TMP2202]], [[TMP2203]] +// SIMD-ONLY0-NEXT: [[CONV3334:%.*]] = zext i1 [[CMP3333]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3334]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2204:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3335:%.*]] = icmp ne i32 [[TMP2204]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3335]], label [[IF_THEN3336:%.*]], label [[IF_END3337:%.*]] +// SIMD-ONLY0: if.then3336: +// SIMD-ONLY0-NEXT: [[TMP2205:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2205]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3337]] +// SIMD-ONLY0: if.end3337: +// SIMD-ONLY0-NEXT: [[TMP2206:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2207:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3338:%.*]] = icmp eq i32 [[TMP2206]], [[TMP2207]] +// SIMD-ONLY0-NEXT: [[CONV3339:%.*]] = zext i1 [[CMP3338]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3339]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2208:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3340:%.*]] = icmp ne i32 [[TMP2208]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3340]], label [[IF_THEN3341:%.*]], label [[IF_END3342:%.*]] +// SIMD-ONLY0: if.then3341: +// SIMD-ONLY0-NEXT: [[TMP2209:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2209]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3342]] +// SIMD-ONLY0: if.end3342: +// SIMD-ONLY0-NEXT: [[TMP2210:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2211:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3343:%.*]] = icmp eq i32 [[TMP2210]], [[TMP2211]] +// SIMD-ONLY0-NEXT: [[CONV3344:%.*]] = zext i1 [[CMP3343]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3344]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2212:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3345:%.*]] = icmp ne i32 [[TMP2212]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3345]], label [[IF_THEN3346:%.*]], label [[IF_ELSE3347:%.*]] +// SIMD-ONLY0: if.then3346: +// SIMD-ONLY0-NEXT: [[TMP2213:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2213]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3348:%.*]] +// SIMD-ONLY0: if.else3347: +// SIMD-ONLY0-NEXT: [[TMP2214:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2214]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3348]] +// SIMD-ONLY0: if.end3348: +// SIMD-ONLY0-NEXT: [[TMP2215:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2216:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3349:%.*]] = icmp eq i32 [[TMP2215]], [[TMP2216]] +// SIMD-ONLY0-NEXT: [[CONV3350:%.*]] = zext i1 [[CMP3349]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3350]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2217:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3351:%.*]] = icmp ne i32 [[TMP2217]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3351]], label [[IF_THEN3352:%.*]], label [[IF_ELSE3353:%.*]] +// SIMD-ONLY0: if.then3352: +// SIMD-ONLY0-NEXT: [[TMP2218:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2218]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3354:%.*]] +// SIMD-ONLY0: if.else3353: +// SIMD-ONLY0-NEXT: [[TMP2219:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2219]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3354]] +// SIMD-ONLY0: if.end3354: +// SIMD-ONLY0-NEXT: [[TMP2220:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2220]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2221:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2222:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3355:%.*]] = icmp ugt i32 [[TMP2221]], [[TMP2222]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3355]], label [[IF_THEN3357:%.*]], label [[IF_END3358:%.*]] +// SIMD-ONLY0: if.then3357: +// SIMD-ONLY0-NEXT: [[TMP2223:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2223]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3358]] +// SIMD-ONLY0: if.end3358: +// SIMD-ONLY0-NEXT: [[TMP2224:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2224]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2225:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2226:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3359:%.*]] = icmp ugt i32 [[TMP2225]], [[TMP2226]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3359]], label [[IF_THEN3361:%.*]], label [[IF_END3362:%.*]] +// SIMD-ONLY0: if.then3361: +// SIMD-ONLY0-NEXT: [[TMP2227:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2227]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3362]] +// SIMD-ONLY0: if.end3362: +// SIMD-ONLY0-NEXT: [[TMP2228:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2228]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2229:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2230:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3363:%.*]] = icmp ult i32 [[TMP2229]], [[TMP2230]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3363]], label [[IF_THEN3365:%.*]], label [[IF_END3366:%.*]] +// SIMD-ONLY0: if.then3365: +// SIMD-ONLY0-NEXT: [[TMP2231:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2231]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3366]] +// SIMD-ONLY0: if.end3366: +// SIMD-ONLY0-NEXT: [[TMP2232:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2232]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2233:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2234:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3367:%.*]] = icmp ult i32 [[TMP2233]], [[TMP2234]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3367]], label [[IF_THEN3369:%.*]], label [[IF_END3370:%.*]] +// SIMD-ONLY0: if.then3369: +// SIMD-ONLY0-NEXT: [[TMP2235:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2235]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3370]] +// SIMD-ONLY0: if.end3370: +// SIMD-ONLY0-NEXT: [[TMP2236:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2236]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2237:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2238:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3371:%.*]] = icmp eq i32 [[TMP2237]], [[TMP2238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3371]], label [[IF_THEN3373:%.*]], label [[IF_END3374:%.*]] +// SIMD-ONLY0: if.then3373: +// SIMD-ONLY0-NEXT: [[TMP2239:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2239]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3374]] +// SIMD-ONLY0: if.end3374: +// SIMD-ONLY0-NEXT: [[TMP2240:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2240]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2241:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2242:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3375:%.*]] = icmp eq i32 [[TMP2241]], [[TMP2242]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3375]], label [[IF_THEN3377:%.*]], label [[IF_END3378:%.*]] +// SIMD-ONLY0: if.then3377: +// SIMD-ONLY0-NEXT: [[TMP2243:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2243]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3378]] +// SIMD-ONLY0: if.end3378: +// SIMD-ONLY0-NEXT: [[TMP2244:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2245:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3379:%.*]] = icmp ugt i32 [[TMP2244]], [[TMP2245]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3379]], label [[IF_THEN3381:%.*]], label [[IF_END3382:%.*]] +// SIMD-ONLY0: if.then3381: +// SIMD-ONLY0-NEXT: [[TMP2246:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2246]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3382]] +// SIMD-ONLY0: if.end3382: +// SIMD-ONLY0-NEXT: [[TMP2247:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2247]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2248:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2249:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3383:%.*]] = icmp ugt i32 [[TMP2248]], [[TMP2249]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3383]], label [[IF_THEN3385:%.*]], label [[IF_END3386:%.*]] +// SIMD-ONLY0: if.then3385: +// SIMD-ONLY0-NEXT: [[TMP2250:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2250]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3386]] +// SIMD-ONLY0: if.end3386: +// SIMD-ONLY0-NEXT: [[TMP2251:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2251]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2252:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2253:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3387:%.*]] = icmp ult i32 [[TMP2252]], [[TMP2253]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3387]], label [[IF_THEN3389:%.*]], label [[IF_END3390:%.*]] +// SIMD-ONLY0: if.then3389: +// SIMD-ONLY0-NEXT: [[TMP2254:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2254]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3390]] +// SIMD-ONLY0: if.end3390: +// SIMD-ONLY0-NEXT: [[TMP2255:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2255]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2256:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2257:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3391:%.*]] = icmp ult i32 [[TMP2256]], [[TMP2257]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3391]], label [[IF_THEN3393:%.*]], label [[IF_END3394:%.*]] +// SIMD-ONLY0: if.then3393: +// SIMD-ONLY0-NEXT: [[TMP2258:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2258]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3394]] +// SIMD-ONLY0: if.end3394: +// SIMD-ONLY0-NEXT: [[TMP2259:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2259]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2260:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2261:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3395:%.*]] = icmp eq i32 [[TMP2260]], [[TMP2261]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3395]], label [[IF_THEN3397:%.*]], label [[IF_END3398:%.*]] +// SIMD-ONLY0: if.then3397: +// SIMD-ONLY0-NEXT: [[TMP2262:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2262]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3398]] +// SIMD-ONLY0: if.end3398: +// SIMD-ONLY0-NEXT: [[TMP2263:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2263]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2264:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2265:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3399:%.*]] = icmp eq i32 [[TMP2264]], [[TMP2265]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3399]], label [[IF_THEN3401:%.*]], label [[IF_END3402:%.*]] +// SIMD-ONLY0: if.then3401: +// SIMD-ONLY0-NEXT: [[TMP2266:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2266]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3402]] +// SIMD-ONLY0: if.end3402: +// SIMD-ONLY0-NEXT: [[TMP2267:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2267]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2268:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2269:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3403:%.*]] = icmp eq i32 [[TMP2268]], [[TMP2269]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3403]], label [[IF_THEN3405:%.*]], label [[IF_ELSE3406:%.*]] +// SIMD-ONLY0: if.then3405: +// SIMD-ONLY0-NEXT: [[TMP2270:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2270]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3407:%.*]] +// SIMD-ONLY0: if.else3406: +// SIMD-ONLY0-NEXT: [[TMP2271:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2271]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3407]] +// SIMD-ONLY0: if.end3407: +// SIMD-ONLY0-NEXT: [[TMP2272:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2273:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3408:%.*]] = icmp eq i32 [[TMP2272]], [[TMP2273]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3408]], label [[IF_THEN3410:%.*]], label [[IF_ELSE3411:%.*]] +// SIMD-ONLY0: if.then3410: +// SIMD-ONLY0-NEXT: [[TMP2274:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2274]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3412:%.*]] +// SIMD-ONLY0: if.else3411: +// SIMD-ONLY0-NEXT: [[TMP2275:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2275]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3412]] +// SIMD-ONLY0: if.end3412: +// SIMD-ONLY0-NEXT: [[TMP2276:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2277:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3413:%.*]] = icmp eq i32 [[TMP2276]], [[TMP2277]] +// SIMD-ONLY0-NEXT: [[CONV3414:%.*]] = zext i1 [[CMP3413]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3414]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2278:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3415:%.*]] = icmp ne i32 [[TMP2278]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3415]], label [[IF_THEN3416:%.*]], label [[IF_END3417:%.*]] +// SIMD-ONLY0: if.then3416: +// SIMD-ONLY0-NEXT: [[TMP2279:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2279]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3417]] +// SIMD-ONLY0: if.end3417: +// SIMD-ONLY0-NEXT: [[TMP2280:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2281:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3418:%.*]] = icmp eq i32 [[TMP2280]], [[TMP2281]] +// SIMD-ONLY0-NEXT: [[CONV3419:%.*]] = zext i1 [[CMP3418]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3419]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2282:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3420:%.*]] = icmp ne i32 [[TMP2282]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3420]], label [[IF_THEN3421:%.*]], label [[IF_END3422:%.*]] +// SIMD-ONLY0: if.then3421: +// SIMD-ONLY0-NEXT: [[TMP2283:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2283]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3422]] +// SIMD-ONLY0: if.end3422: +// SIMD-ONLY0-NEXT: [[TMP2284:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2285:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3423:%.*]] = icmp eq i32 [[TMP2284]], [[TMP2285]] +// SIMD-ONLY0-NEXT: [[CONV3424:%.*]] = zext i1 [[CMP3423]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3424]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2286:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3425:%.*]] = icmp ne i32 [[TMP2286]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3425]], label [[IF_THEN3426:%.*]], label [[IF_ELSE3427:%.*]] +// SIMD-ONLY0: if.then3426: +// SIMD-ONLY0-NEXT: [[TMP2287:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2287]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3428:%.*]] +// SIMD-ONLY0: if.else3427: +// SIMD-ONLY0-NEXT: [[TMP2288:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2288]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3428]] +// SIMD-ONLY0: if.end3428: +// SIMD-ONLY0-NEXT: [[TMP2289:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2290:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3429:%.*]] = icmp eq i32 [[TMP2289]], [[TMP2290]] +// SIMD-ONLY0-NEXT: [[CONV3430:%.*]] = zext i1 [[CMP3429]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3430]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2291:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3431:%.*]] = icmp ne i32 [[TMP2291]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3431]], label [[IF_THEN3432:%.*]], label [[IF_ELSE3433:%.*]] +// SIMD-ONLY0: if.then3432: +// SIMD-ONLY0-NEXT: [[TMP2292:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2292]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3434:%.*]] +// SIMD-ONLY0: if.else3433: +// SIMD-ONLY0-NEXT: [[TMP2293:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2293]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3434]] +// SIMD-ONLY0: if.end3434: +// SIMD-ONLY0-NEXT: [[TMP2294:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2294]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2295:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2296:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3435:%.*]] = icmp ugt i32 [[TMP2295]], [[TMP2296]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3435]], label [[IF_THEN3437:%.*]], label [[IF_END3438:%.*]] +// SIMD-ONLY0: if.then3437: +// SIMD-ONLY0-NEXT: [[TMP2297:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2297]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3438]] +// SIMD-ONLY0: if.end3438: +// SIMD-ONLY0-NEXT: [[TMP2298:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2298]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2299:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2300:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3439:%.*]] = icmp ugt i32 [[TMP2299]], [[TMP2300]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3439]], label [[IF_THEN3441:%.*]], label [[IF_END3442:%.*]] +// SIMD-ONLY0: if.then3441: +// SIMD-ONLY0-NEXT: [[TMP2301:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2301]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3442]] +// SIMD-ONLY0: if.end3442: +// SIMD-ONLY0-NEXT: [[TMP2302:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2302]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2303:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2304:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3443:%.*]] = icmp ult i32 [[TMP2303]], [[TMP2304]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3443]], label [[IF_THEN3445:%.*]], label [[IF_END3446:%.*]] +// SIMD-ONLY0: if.then3445: +// SIMD-ONLY0-NEXT: [[TMP2305:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2305]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3446]] +// SIMD-ONLY0: if.end3446: +// SIMD-ONLY0-NEXT: [[TMP2306:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2306]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2307:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2308:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3447:%.*]] = icmp ult i32 [[TMP2307]], [[TMP2308]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3447]], label [[IF_THEN3449:%.*]], label [[IF_END3450:%.*]] +// SIMD-ONLY0: if.then3449: +// SIMD-ONLY0-NEXT: [[TMP2309:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2309]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3450]] +// SIMD-ONLY0: if.end3450: +// SIMD-ONLY0-NEXT: [[TMP2310:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2310]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2311:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2312:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3451:%.*]] = icmp eq i32 [[TMP2311]], [[TMP2312]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3451]], label [[IF_THEN3453:%.*]], label [[IF_END3454:%.*]] +// SIMD-ONLY0: if.then3453: +// SIMD-ONLY0-NEXT: [[TMP2313:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2313]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3454]] +// SIMD-ONLY0: if.end3454: +// SIMD-ONLY0-NEXT: [[TMP2314:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2314]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2315:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2316:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3455:%.*]] = icmp eq i32 [[TMP2315]], [[TMP2316]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3455]], label [[IF_THEN3457:%.*]], label [[IF_END3458:%.*]] +// SIMD-ONLY0: if.then3457: +// SIMD-ONLY0-NEXT: [[TMP2317:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2317]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3458]] +// SIMD-ONLY0: if.end3458: +// SIMD-ONLY0-NEXT: [[TMP2318:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2319:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3459:%.*]] = icmp ugt i32 [[TMP2318]], [[TMP2319]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3459]], label [[IF_THEN3461:%.*]], label [[IF_END3462:%.*]] +// SIMD-ONLY0: if.then3461: +// SIMD-ONLY0-NEXT: [[TMP2320:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2320]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3462]] +// SIMD-ONLY0: if.end3462: +// SIMD-ONLY0-NEXT: [[TMP2321:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2321]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2322:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2323:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3463:%.*]] = icmp ugt i32 [[TMP2322]], [[TMP2323]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3463]], label [[IF_THEN3465:%.*]], label [[IF_END3466:%.*]] +// SIMD-ONLY0: if.then3465: +// SIMD-ONLY0-NEXT: [[TMP2324:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2324]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3466]] +// SIMD-ONLY0: if.end3466: +// SIMD-ONLY0-NEXT: [[TMP2325:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2325]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2326:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2327:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3467:%.*]] = icmp ult i32 [[TMP2326]], [[TMP2327]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3467]], label [[IF_THEN3469:%.*]], label [[IF_END3470:%.*]] +// SIMD-ONLY0: if.then3469: +// SIMD-ONLY0-NEXT: [[TMP2328:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2328]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3470]] +// SIMD-ONLY0: if.end3470: +// SIMD-ONLY0-NEXT: [[TMP2329:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2329]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2330:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2331:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3471:%.*]] = icmp ult i32 [[TMP2330]], [[TMP2331]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3471]], label [[IF_THEN3473:%.*]], label [[IF_END3474:%.*]] +// SIMD-ONLY0: if.then3473: +// SIMD-ONLY0-NEXT: [[TMP2332:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2332]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3474]] +// SIMD-ONLY0: if.end3474: +// SIMD-ONLY0-NEXT: [[TMP2333:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2333]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2334:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2335:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3475:%.*]] = icmp eq i32 [[TMP2334]], [[TMP2335]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3475]], label [[IF_THEN3477:%.*]], label [[IF_END3478:%.*]] +// SIMD-ONLY0: if.then3477: +// SIMD-ONLY0-NEXT: [[TMP2336:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2336]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3478]] +// SIMD-ONLY0: if.end3478: +// SIMD-ONLY0-NEXT: [[TMP2337:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2337]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2338:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2339:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3479:%.*]] = icmp eq i32 [[TMP2338]], [[TMP2339]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3479]], label [[IF_THEN3481:%.*]], label [[IF_END3482:%.*]] +// SIMD-ONLY0: if.then3481: +// SIMD-ONLY0-NEXT: [[TMP2340:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2340]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3482]] +// SIMD-ONLY0: if.end3482: +// SIMD-ONLY0-NEXT: [[TMP2341:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2341]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2342:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2343:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3483:%.*]] = icmp eq i32 [[TMP2342]], [[TMP2343]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3483]], label [[IF_THEN3485:%.*]], label [[IF_ELSE3486:%.*]] +// SIMD-ONLY0: if.then3485: +// SIMD-ONLY0-NEXT: [[TMP2344:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2344]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3487:%.*]] +// SIMD-ONLY0: if.else3486: +// SIMD-ONLY0-NEXT: [[TMP2345:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2345]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3487]] +// SIMD-ONLY0: if.end3487: +// SIMD-ONLY0-NEXT: [[TMP2346:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2347:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3488:%.*]] = icmp eq i32 [[TMP2346]], [[TMP2347]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3488]], label [[IF_THEN3490:%.*]], label [[IF_ELSE3491:%.*]] +// SIMD-ONLY0: if.then3490: +// SIMD-ONLY0-NEXT: [[TMP2348:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2348]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3492:%.*]] +// SIMD-ONLY0: if.else3491: +// SIMD-ONLY0-NEXT: [[TMP2349:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2349]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3492]] +// SIMD-ONLY0: if.end3492: +// SIMD-ONLY0-NEXT: [[TMP2350:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2351:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3493:%.*]] = icmp eq i32 [[TMP2350]], [[TMP2351]] +// SIMD-ONLY0-NEXT: [[CONV3494:%.*]] = zext i1 [[CMP3493]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3494]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2352:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3495:%.*]] = icmp ne i32 [[TMP2352]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3495]], label [[IF_THEN3496:%.*]], label [[IF_END3497:%.*]] +// SIMD-ONLY0: if.then3496: +// SIMD-ONLY0-NEXT: [[TMP2353:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2353]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3497]] +// SIMD-ONLY0: if.end3497: +// SIMD-ONLY0-NEXT: [[TMP2354:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2355:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3498:%.*]] = icmp eq i32 [[TMP2354]], [[TMP2355]] +// SIMD-ONLY0-NEXT: [[CONV3499:%.*]] = zext i1 [[CMP3498]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3499]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2356:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3500:%.*]] = icmp ne i32 [[TMP2356]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3500]], label [[IF_THEN3501:%.*]], label [[IF_END3502:%.*]] +// SIMD-ONLY0: if.then3501: +// SIMD-ONLY0-NEXT: [[TMP2357:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2357]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3502]] +// SIMD-ONLY0: if.end3502: +// SIMD-ONLY0-NEXT: [[TMP2358:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2359:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3503:%.*]] = icmp eq i32 [[TMP2358]], [[TMP2359]] +// SIMD-ONLY0-NEXT: [[CONV3504:%.*]] = zext i1 [[CMP3503]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3504]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2360:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3505:%.*]] = icmp ne i32 [[TMP2360]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3505]], label [[IF_THEN3506:%.*]], label [[IF_ELSE3507:%.*]] +// SIMD-ONLY0: if.then3506: +// SIMD-ONLY0-NEXT: [[TMP2361:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2361]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3508:%.*]] +// SIMD-ONLY0: if.else3507: +// SIMD-ONLY0-NEXT: [[TMP2362:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2362]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3508]] +// SIMD-ONLY0: if.end3508: +// SIMD-ONLY0-NEXT: [[TMP2363:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2364:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3509:%.*]] = icmp eq i32 [[TMP2363]], [[TMP2364]] +// SIMD-ONLY0-NEXT: [[CONV3510:%.*]] = zext i1 [[CMP3509]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3510]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2365:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3511:%.*]] = icmp ne i32 [[TMP2365]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3511]], label [[IF_THEN3512:%.*]], label [[IF_ELSE3513:%.*]] +// SIMD-ONLY0: if.then3512: +// SIMD-ONLY0-NEXT: [[TMP2366:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2366]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3514:%.*]] +// SIMD-ONLY0: if.else3513: +// SIMD-ONLY0-NEXT: [[TMP2367:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2367]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3514]] +// SIMD-ONLY0: if.end3514: +// SIMD-ONLY0-NEXT: [[TMP2368:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2368]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2369:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2370:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3515:%.*]] = icmp ugt i32 [[TMP2369]], [[TMP2370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3515]], label [[IF_THEN3517:%.*]], label [[IF_END3518:%.*]] +// SIMD-ONLY0: if.then3517: +// SIMD-ONLY0-NEXT: [[TMP2371:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2371]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3518]] +// SIMD-ONLY0: if.end3518: +// SIMD-ONLY0-NEXT: [[TMP2372:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2372]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2373:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2374:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3519:%.*]] = icmp ugt i32 [[TMP2373]], [[TMP2374]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3519]], label [[IF_THEN3521:%.*]], label [[IF_END3522:%.*]] +// SIMD-ONLY0: if.then3521: +// SIMD-ONLY0-NEXT: [[TMP2375:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2375]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3522]] +// SIMD-ONLY0: if.end3522: +// SIMD-ONLY0-NEXT: [[TMP2376:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2376]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2377:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2378:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3523:%.*]] = icmp ult i32 [[TMP2377]], [[TMP2378]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3523]], label [[IF_THEN3525:%.*]], label [[IF_END3526:%.*]] +// SIMD-ONLY0: if.then3525: +// SIMD-ONLY0-NEXT: [[TMP2379:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2379]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3526]] +// SIMD-ONLY0: if.end3526: +// SIMD-ONLY0-NEXT: [[TMP2380:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2380]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2381:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2382:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3527:%.*]] = icmp ult i32 [[TMP2381]], [[TMP2382]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3527]], label [[IF_THEN3529:%.*]], label [[IF_END3530:%.*]] +// SIMD-ONLY0: if.then3529: +// SIMD-ONLY0-NEXT: [[TMP2383:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2383]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3530]] +// SIMD-ONLY0: if.end3530: +// SIMD-ONLY0-NEXT: [[TMP2384:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2384]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2385:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2386:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3531:%.*]] = icmp eq i32 [[TMP2385]], [[TMP2386]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3531]], label [[IF_THEN3533:%.*]], label [[IF_END3534:%.*]] +// SIMD-ONLY0: if.then3533: +// SIMD-ONLY0-NEXT: [[TMP2387:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2387]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3534]] +// SIMD-ONLY0: if.end3534: +// SIMD-ONLY0-NEXT: [[TMP2388:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2388]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2389:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2390:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3535:%.*]] = icmp eq i32 [[TMP2389]], [[TMP2390]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3535]], label [[IF_THEN3537:%.*]], label [[IF_END3538:%.*]] +// SIMD-ONLY0: if.then3537: +// SIMD-ONLY0-NEXT: [[TMP2391:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2391]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3538]] +// SIMD-ONLY0: if.end3538: +// SIMD-ONLY0-NEXT: [[TMP2392:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2393:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3539:%.*]] = icmp ugt i32 [[TMP2392]], [[TMP2393]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3539]], label [[IF_THEN3541:%.*]], label [[IF_END3542:%.*]] +// SIMD-ONLY0: if.then3541: +// SIMD-ONLY0-NEXT: [[TMP2394:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2394]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3542]] +// SIMD-ONLY0: if.end3542: +// SIMD-ONLY0-NEXT: [[TMP2395:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2395]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2396:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2397:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3543:%.*]] = icmp ugt i32 [[TMP2396]], [[TMP2397]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3543]], label [[IF_THEN3545:%.*]], label [[IF_END3546:%.*]] +// SIMD-ONLY0: if.then3545: +// SIMD-ONLY0-NEXT: [[TMP2398:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2398]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3546]] +// SIMD-ONLY0: if.end3546: +// SIMD-ONLY0-NEXT: [[TMP2399:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2399]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2400:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2401:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3547:%.*]] = icmp ult i32 [[TMP2400]], [[TMP2401]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3547]], label [[IF_THEN3549:%.*]], label [[IF_END3550:%.*]] +// SIMD-ONLY0: if.then3549: +// SIMD-ONLY0-NEXT: [[TMP2402:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2402]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3550]] +// SIMD-ONLY0: if.end3550: +// SIMD-ONLY0-NEXT: [[TMP2403:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2403]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2404:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2405:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3551:%.*]] = icmp ult i32 [[TMP2404]], [[TMP2405]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3551]], label [[IF_THEN3553:%.*]], label [[IF_END3554:%.*]] +// SIMD-ONLY0: if.then3553: +// SIMD-ONLY0-NEXT: [[TMP2406:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2406]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3554]] +// SIMD-ONLY0: if.end3554: +// SIMD-ONLY0-NEXT: [[TMP2407:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2407]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2408:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2409:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3555:%.*]] = icmp eq i32 [[TMP2408]], [[TMP2409]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3555]], label [[IF_THEN3557:%.*]], label [[IF_END3558:%.*]] +// SIMD-ONLY0: if.then3557: +// SIMD-ONLY0-NEXT: [[TMP2410:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2410]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3558]] +// SIMD-ONLY0: if.end3558: +// SIMD-ONLY0-NEXT: [[TMP2411:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2411]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2412:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2413:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3559:%.*]] = icmp eq i32 [[TMP2412]], [[TMP2413]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3559]], label [[IF_THEN3561:%.*]], label [[IF_END3562:%.*]] +// SIMD-ONLY0: if.then3561: +// SIMD-ONLY0-NEXT: [[TMP2414:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2414]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3562]] +// SIMD-ONLY0: if.end3562: +// SIMD-ONLY0-NEXT: [[TMP2415:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2415]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2416:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2417:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3563:%.*]] = icmp eq i32 [[TMP2416]], [[TMP2417]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3563]], label [[IF_THEN3565:%.*]], label [[IF_ELSE3566:%.*]] +// SIMD-ONLY0: if.then3565: +// SIMD-ONLY0-NEXT: [[TMP2418:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2418]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3567:%.*]] +// SIMD-ONLY0: if.else3566: +// SIMD-ONLY0-NEXT: [[TMP2419:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2419]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3567]] +// SIMD-ONLY0: if.end3567: +// SIMD-ONLY0-NEXT: [[TMP2420:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2421:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3568:%.*]] = icmp eq i32 [[TMP2420]], [[TMP2421]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3568]], label [[IF_THEN3570:%.*]], label [[IF_ELSE3571:%.*]] +// SIMD-ONLY0: if.then3570: +// SIMD-ONLY0-NEXT: [[TMP2422:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2422]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3572:%.*]] +// SIMD-ONLY0: if.else3571: +// SIMD-ONLY0-NEXT: [[TMP2423:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2423]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3572]] +// SIMD-ONLY0: if.end3572: +// SIMD-ONLY0-NEXT: [[TMP2424:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2425:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3573:%.*]] = icmp eq i32 [[TMP2424]], [[TMP2425]] +// SIMD-ONLY0-NEXT: [[CONV3574:%.*]] = zext i1 [[CMP3573]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3574]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2426:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3575:%.*]] = icmp ne i32 [[TMP2426]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3575]], label [[IF_THEN3576:%.*]], label [[IF_END3577:%.*]] +// SIMD-ONLY0: if.then3576: +// SIMD-ONLY0-NEXT: [[TMP2427:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2427]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3577]] +// SIMD-ONLY0: if.end3577: +// SIMD-ONLY0-NEXT: [[TMP2428:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2429:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3578:%.*]] = icmp eq i32 [[TMP2428]], [[TMP2429]] +// SIMD-ONLY0-NEXT: [[CONV3579:%.*]] = zext i1 [[CMP3578]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3579]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2430:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3580:%.*]] = icmp ne i32 [[TMP2430]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3580]], label [[IF_THEN3581:%.*]], label [[IF_END3582:%.*]] +// SIMD-ONLY0: if.then3581: +// SIMD-ONLY0-NEXT: [[TMP2431:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2431]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3582]] +// SIMD-ONLY0: if.end3582: +// SIMD-ONLY0-NEXT: [[TMP2432:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2433:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3583:%.*]] = icmp eq i32 [[TMP2432]], [[TMP2433]] +// SIMD-ONLY0-NEXT: [[CONV3584:%.*]] = zext i1 [[CMP3583]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3584]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2434:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3585:%.*]] = icmp ne i32 [[TMP2434]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3585]], label [[IF_THEN3586:%.*]], label [[IF_ELSE3587:%.*]] +// SIMD-ONLY0: if.then3586: +// SIMD-ONLY0-NEXT: [[TMP2435:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2435]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3588:%.*]] +// SIMD-ONLY0: if.else3587: +// SIMD-ONLY0-NEXT: [[TMP2436:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2436]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3588]] +// SIMD-ONLY0: if.end3588: +// SIMD-ONLY0-NEXT: [[TMP2437:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2438:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3589:%.*]] = icmp eq i32 [[TMP2437]], [[TMP2438]] +// SIMD-ONLY0-NEXT: [[CONV3590:%.*]] = zext i1 [[CMP3589]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3590]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2439:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3591:%.*]] = icmp ne i32 [[TMP2439]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3591]], label [[IF_THEN3592:%.*]], label [[IF_ELSE3593:%.*]] +// SIMD-ONLY0: if.then3592: +// SIMD-ONLY0-NEXT: [[TMP2440:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2440]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3594:%.*]] +// SIMD-ONLY0: if.else3593: +// SIMD-ONLY0-NEXT: [[TMP2441:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2441]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3594]] +// SIMD-ONLY0: if.end3594: +// SIMD-ONLY0-NEXT: [[TMP2442:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2442]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2443:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2444:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3595:%.*]] = icmp ugt i32 [[TMP2443]], [[TMP2444]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3595]], label [[IF_THEN3597:%.*]], label [[IF_END3598:%.*]] +// SIMD-ONLY0: if.then3597: +// SIMD-ONLY0-NEXT: [[TMP2445:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2445]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3598]] +// SIMD-ONLY0: if.end3598: +// SIMD-ONLY0-NEXT: [[TMP2446:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2446]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2447:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2448:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3599:%.*]] = icmp ugt i32 [[TMP2447]], [[TMP2448]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3599]], label [[IF_THEN3601:%.*]], label [[IF_END3602:%.*]] +// SIMD-ONLY0: if.then3601: +// SIMD-ONLY0-NEXT: [[TMP2449:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2449]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3602]] +// SIMD-ONLY0: if.end3602: +// SIMD-ONLY0-NEXT: [[TMP2450:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2450]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2451:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2452:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3603:%.*]] = icmp ult i32 [[TMP2451]], [[TMP2452]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3603]], label [[IF_THEN3605:%.*]], label [[IF_END3606:%.*]] +// SIMD-ONLY0: if.then3605: +// SIMD-ONLY0-NEXT: [[TMP2453:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2453]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3606]] +// SIMD-ONLY0: if.end3606: +// SIMD-ONLY0-NEXT: [[TMP2454:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2454]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2455:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2456:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3607:%.*]] = icmp ult i32 [[TMP2455]], [[TMP2456]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3607]], label [[IF_THEN3609:%.*]], label [[IF_END3610:%.*]] +// SIMD-ONLY0: if.then3609: +// SIMD-ONLY0-NEXT: [[TMP2457:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2457]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3610]] +// SIMD-ONLY0: if.end3610: +// SIMD-ONLY0-NEXT: [[TMP2458:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2458]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2459:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2460:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3611:%.*]] = icmp eq i32 [[TMP2459]], [[TMP2460]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3611]], label [[IF_THEN3613:%.*]], label [[IF_END3614:%.*]] +// SIMD-ONLY0: if.then3613: +// SIMD-ONLY0-NEXT: [[TMP2461:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2461]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3614]] +// SIMD-ONLY0: if.end3614: +// SIMD-ONLY0-NEXT: [[TMP2462:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2462]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2463:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2464:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3615:%.*]] = icmp eq i32 [[TMP2463]], [[TMP2464]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3615]], label [[IF_THEN3617:%.*]], label [[IF_END3618:%.*]] +// SIMD-ONLY0: if.then3617: +// SIMD-ONLY0-NEXT: [[TMP2465:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2465]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3618]] +// SIMD-ONLY0: if.end3618: +// SIMD-ONLY0-NEXT: [[TMP2466:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2467:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3619:%.*]] = icmp ugt i32 [[TMP2466]], [[TMP2467]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3619]], label [[IF_THEN3621:%.*]], label [[IF_END3622:%.*]] +// SIMD-ONLY0: if.then3621: +// SIMD-ONLY0-NEXT: [[TMP2468:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2468]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3622]] +// SIMD-ONLY0: if.end3622: +// SIMD-ONLY0-NEXT: [[TMP2469:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2469]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2470:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2471:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3623:%.*]] = icmp ugt i32 [[TMP2470]], [[TMP2471]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3623]], label [[IF_THEN3625:%.*]], label [[IF_END3626:%.*]] +// SIMD-ONLY0: if.then3625: +// SIMD-ONLY0-NEXT: [[TMP2472:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2472]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3626]] +// SIMD-ONLY0: if.end3626: +// SIMD-ONLY0-NEXT: [[TMP2473:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2473]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2474:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2475:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3627:%.*]] = icmp ult i32 [[TMP2474]], [[TMP2475]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3627]], label [[IF_THEN3629:%.*]], label [[IF_END3630:%.*]] +// SIMD-ONLY0: if.then3629: +// SIMD-ONLY0-NEXT: [[TMP2476:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2476]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3630]] +// SIMD-ONLY0: if.end3630: +// SIMD-ONLY0-NEXT: [[TMP2477:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2477]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2478:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2479:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3631:%.*]] = icmp ult i32 [[TMP2478]], [[TMP2479]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3631]], label [[IF_THEN3633:%.*]], label [[IF_END3634:%.*]] +// SIMD-ONLY0: if.then3633: +// SIMD-ONLY0-NEXT: [[TMP2480:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2480]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3634]] +// SIMD-ONLY0: if.end3634: +// SIMD-ONLY0-NEXT: [[TMP2481:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2481]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2482:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2483:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3635:%.*]] = icmp eq i32 [[TMP2482]], [[TMP2483]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3635]], label [[IF_THEN3637:%.*]], label [[IF_END3638:%.*]] +// SIMD-ONLY0: if.then3637: +// SIMD-ONLY0-NEXT: [[TMP2484:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2484]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3638]] +// SIMD-ONLY0: if.end3638: +// SIMD-ONLY0-NEXT: [[TMP2485:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2485]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2486:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2487:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3639:%.*]] = icmp eq i32 [[TMP2486]], [[TMP2487]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3639]], label [[IF_THEN3641:%.*]], label [[IF_END3642:%.*]] +// SIMD-ONLY0: if.then3641: +// SIMD-ONLY0-NEXT: [[TMP2488:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2488]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3642]] +// SIMD-ONLY0: if.end3642: +// SIMD-ONLY0-NEXT: [[TMP2489:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2489]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2490:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2491:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3643:%.*]] = icmp eq i32 [[TMP2490]], [[TMP2491]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3643]], label [[IF_THEN3645:%.*]], label [[IF_ELSE3646:%.*]] +// SIMD-ONLY0: if.then3645: +// SIMD-ONLY0-NEXT: [[TMP2492:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2492]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3647:%.*]] +// SIMD-ONLY0: if.else3646: +// SIMD-ONLY0-NEXT: [[TMP2493:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2493]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3647]] +// SIMD-ONLY0: if.end3647: +// SIMD-ONLY0-NEXT: [[TMP2494:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2495:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3648:%.*]] = icmp eq i32 [[TMP2494]], [[TMP2495]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3648]], label [[IF_THEN3650:%.*]], label [[IF_ELSE3651:%.*]] +// SIMD-ONLY0: if.then3650: +// SIMD-ONLY0-NEXT: [[TMP2496:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2496]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3652:%.*]] +// SIMD-ONLY0: if.else3651: +// SIMD-ONLY0-NEXT: [[TMP2497:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2497]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3652]] +// SIMD-ONLY0: if.end3652: +// SIMD-ONLY0-NEXT: [[TMP2498:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2499:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3653:%.*]] = icmp eq i32 [[TMP2498]], [[TMP2499]] +// SIMD-ONLY0-NEXT: [[CONV3654:%.*]] = zext i1 [[CMP3653]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3654]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2500:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3655:%.*]] = icmp ne i32 [[TMP2500]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3655]], label [[IF_THEN3656:%.*]], label [[IF_END3657:%.*]] +// SIMD-ONLY0: if.then3656: +// SIMD-ONLY0-NEXT: [[TMP2501:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2501]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3657]] +// SIMD-ONLY0: if.end3657: +// SIMD-ONLY0-NEXT: [[TMP2502:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2503:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3658:%.*]] = icmp eq i32 [[TMP2502]], [[TMP2503]] +// SIMD-ONLY0-NEXT: [[CONV3659:%.*]] = zext i1 [[CMP3658]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3659]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2504:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3660:%.*]] = icmp ne i32 [[TMP2504]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3660]], label [[IF_THEN3661:%.*]], label [[IF_END3662:%.*]] +// SIMD-ONLY0: if.then3661: +// SIMD-ONLY0-NEXT: [[TMP2505:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2505]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3662]] +// SIMD-ONLY0: if.end3662: +// SIMD-ONLY0-NEXT: [[TMP2506:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2507:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3663:%.*]] = icmp eq i32 [[TMP2506]], [[TMP2507]] +// SIMD-ONLY0-NEXT: [[CONV3664:%.*]] = zext i1 [[CMP3663]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3664]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2508:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3665:%.*]] = icmp ne i32 [[TMP2508]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3665]], label [[IF_THEN3666:%.*]], label [[IF_ELSE3667:%.*]] +// SIMD-ONLY0: if.then3666: +// SIMD-ONLY0-NEXT: [[TMP2509:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2509]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3668:%.*]] +// SIMD-ONLY0: if.else3667: +// SIMD-ONLY0-NEXT: [[TMP2510:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2510]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3668]] +// SIMD-ONLY0: if.end3668: +// SIMD-ONLY0-NEXT: [[TMP2511:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2512:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3669:%.*]] = icmp eq i32 [[TMP2511]], [[TMP2512]] +// SIMD-ONLY0-NEXT: [[CONV3670:%.*]] = zext i1 [[CMP3669]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3670]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2513:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3671:%.*]] = icmp ne i32 [[TMP2513]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3671]], label [[IF_THEN3672:%.*]], label [[IF_ELSE3673:%.*]] +// SIMD-ONLY0: if.then3672: +// SIMD-ONLY0-NEXT: [[TMP2514:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2514]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3674:%.*]] +// SIMD-ONLY0: if.else3673: +// SIMD-ONLY0-NEXT: [[TMP2515:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2515]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3674]] +// SIMD-ONLY0: if.end3674: +// SIMD-ONLY0-NEXT: [[TMP2516:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2516]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2517:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2518:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3675:%.*]] = icmp ugt i32 [[TMP2517]], [[TMP2518]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3675]], label [[IF_THEN3677:%.*]], label [[IF_END3678:%.*]] +// SIMD-ONLY0: if.then3677: +// SIMD-ONLY0-NEXT: [[TMP2519:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2519]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3678]] +// SIMD-ONLY0: if.end3678: +// SIMD-ONLY0-NEXT: [[TMP2520:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2520]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2521:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2522:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3679:%.*]] = icmp ugt i32 [[TMP2521]], [[TMP2522]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3679]], label [[IF_THEN3681:%.*]], label [[IF_END3682:%.*]] +// SIMD-ONLY0: if.then3681: +// SIMD-ONLY0-NEXT: [[TMP2523:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2523]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3682]] +// SIMD-ONLY0: if.end3682: +// SIMD-ONLY0-NEXT: [[TMP2524:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2524]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2525:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2526:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3683:%.*]] = icmp ult i32 [[TMP2525]], [[TMP2526]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3683]], label [[IF_THEN3685:%.*]], label [[IF_END3686:%.*]] +// SIMD-ONLY0: if.then3685: +// SIMD-ONLY0-NEXT: [[TMP2527:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2527]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3686]] +// SIMD-ONLY0: if.end3686: +// SIMD-ONLY0-NEXT: [[TMP2528:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2528]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2529:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2530:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3687:%.*]] = icmp ult i32 [[TMP2529]], [[TMP2530]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3687]], label [[IF_THEN3689:%.*]], label [[IF_END3690:%.*]] +// SIMD-ONLY0: if.then3689: +// SIMD-ONLY0-NEXT: [[TMP2531:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2531]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3690]] +// SIMD-ONLY0: if.end3690: +// SIMD-ONLY0-NEXT: [[TMP2532:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2532]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2533:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2534:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3691:%.*]] = icmp eq i32 [[TMP2533]], [[TMP2534]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3691]], label [[IF_THEN3693:%.*]], label [[IF_END3694:%.*]] +// SIMD-ONLY0: if.then3693: +// SIMD-ONLY0-NEXT: [[TMP2535:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2535]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3694]] +// SIMD-ONLY0: if.end3694: +// SIMD-ONLY0-NEXT: [[TMP2536:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2536]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2537:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2538:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3695:%.*]] = icmp eq i32 [[TMP2537]], [[TMP2538]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3695]], label [[IF_THEN3697:%.*]], label [[IF_END3698:%.*]] +// SIMD-ONLY0: if.then3697: +// SIMD-ONLY0-NEXT: [[TMP2539:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2539]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3698]] +// SIMD-ONLY0: if.end3698: +// SIMD-ONLY0-NEXT: [[TMP2540:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2541:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3699:%.*]] = icmp ugt i32 [[TMP2540]], [[TMP2541]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3699]], label [[IF_THEN3701:%.*]], label [[IF_END3702:%.*]] +// SIMD-ONLY0: if.then3701: +// SIMD-ONLY0-NEXT: [[TMP2542:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2542]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3702]] +// SIMD-ONLY0: if.end3702: +// SIMD-ONLY0-NEXT: [[TMP2543:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2543]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2544:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2545:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3703:%.*]] = icmp ugt i32 [[TMP2544]], [[TMP2545]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3703]], label [[IF_THEN3705:%.*]], label [[IF_END3706:%.*]] +// SIMD-ONLY0: if.then3705: +// SIMD-ONLY0-NEXT: [[TMP2546:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2546]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3706]] +// SIMD-ONLY0: if.end3706: +// SIMD-ONLY0-NEXT: [[TMP2547:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2547]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2548:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2549:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3707:%.*]] = icmp ult i32 [[TMP2548]], [[TMP2549]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3707]], label [[IF_THEN3709:%.*]], label [[IF_END3710:%.*]] +// SIMD-ONLY0: if.then3709: +// SIMD-ONLY0-NEXT: [[TMP2550:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2550]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3710]] +// SIMD-ONLY0: if.end3710: +// SIMD-ONLY0-NEXT: [[TMP2551:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2551]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2552:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2553:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3711:%.*]] = icmp ult i32 [[TMP2552]], [[TMP2553]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3711]], label [[IF_THEN3713:%.*]], label [[IF_END3714:%.*]] +// SIMD-ONLY0: if.then3713: +// SIMD-ONLY0-NEXT: [[TMP2554:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2554]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3714]] +// SIMD-ONLY0: if.end3714: +// SIMD-ONLY0-NEXT: [[TMP2555:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2555]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2556:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2557:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3715:%.*]] = icmp eq i32 [[TMP2556]], [[TMP2557]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3715]], label [[IF_THEN3717:%.*]], label [[IF_END3718:%.*]] +// SIMD-ONLY0: if.then3717: +// SIMD-ONLY0-NEXT: [[TMP2558:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2558]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3718]] +// SIMD-ONLY0: if.end3718: +// SIMD-ONLY0-NEXT: [[TMP2559:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2559]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2560:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2561:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3719:%.*]] = icmp eq i32 [[TMP2560]], [[TMP2561]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3719]], label [[IF_THEN3721:%.*]], label [[IF_END3722:%.*]] +// SIMD-ONLY0: if.then3721: +// SIMD-ONLY0-NEXT: [[TMP2562:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2562]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3722]] +// SIMD-ONLY0: if.end3722: +// SIMD-ONLY0-NEXT: [[TMP2563:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2563]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2564:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2565:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3723:%.*]] = icmp eq i32 [[TMP2564]], [[TMP2565]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3723]], label [[IF_THEN3725:%.*]], label [[IF_ELSE3726:%.*]] +// SIMD-ONLY0: if.then3725: +// SIMD-ONLY0-NEXT: [[TMP2566:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2566]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3727:%.*]] +// SIMD-ONLY0: if.else3726: +// SIMD-ONLY0-NEXT: [[TMP2567:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2567]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3727]] +// SIMD-ONLY0: if.end3727: +// SIMD-ONLY0-NEXT: [[TMP2568:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2569:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3728:%.*]] = icmp eq i32 [[TMP2568]], [[TMP2569]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3728]], label [[IF_THEN3730:%.*]], label [[IF_ELSE3731:%.*]] +// SIMD-ONLY0: if.then3730: +// SIMD-ONLY0-NEXT: [[TMP2570:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2570]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3732:%.*]] +// SIMD-ONLY0: if.else3731: +// SIMD-ONLY0-NEXT: [[TMP2571:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2571]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3732]] +// SIMD-ONLY0: if.end3732: +// SIMD-ONLY0-NEXT: [[TMP2572:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2573:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3733:%.*]] = icmp eq i32 [[TMP2572]], [[TMP2573]] +// SIMD-ONLY0-NEXT: [[CONV3734:%.*]] = zext i1 [[CMP3733]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3734]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2574:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3735:%.*]] = icmp ne i32 [[TMP2574]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3735]], label [[IF_THEN3736:%.*]], label [[IF_END3737:%.*]] +// SIMD-ONLY0: if.then3736: +// SIMD-ONLY0-NEXT: [[TMP2575:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2575]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3737]] +// SIMD-ONLY0: if.end3737: +// SIMD-ONLY0-NEXT: [[TMP2576:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2577:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3738:%.*]] = icmp eq i32 [[TMP2576]], [[TMP2577]] +// SIMD-ONLY0-NEXT: [[CONV3739:%.*]] = zext i1 [[CMP3738]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3739]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2578:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3740:%.*]] = icmp ne i32 [[TMP2578]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3740]], label [[IF_THEN3741:%.*]], label [[IF_END3742:%.*]] +// SIMD-ONLY0: if.then3741: +// SIMD-ONLY0-NEXT: [[TMP2579:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2579]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3742]] +// SIMD-ONLY0: if.end3742: +// SIMD-ONLY0-NEXT: [[TMP2580:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2581:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3743:%.*]] = icmp eq i32 [[TMP2580]], [[TMP2581]] +// SIMD-ONLY0-NEXT: [[CONV3744:%.*]] = zext i1 [[CMP3743]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3744]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2582:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3745:%.*]] = icmp ne i32 [[TMP2582]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3745]], label [[IF_THEN3746:%.*]], label [[IF_ELSE3747:%.*]] +// SIMD-ONLY0: if.then3746: +// SIMD-ONLY0-NEXT: [[TMP2583:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2583]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3748:%.*]] +// SIMD-ONLY0: if.else3747: +// SIMD-ONLY0-NEXT: [[TMP2584:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2584]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3748]] +// SIMD-ONLY0: if.end3748: +// SIMD-ONLY0-NEXT: [[TMP2585:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2586:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3749:%.*]] = icmp eq i32 [[TMP2585]], [[TMP2586]] +// SIMD-ONLY0-NEXT: [[CONV3750:%.*]] = zext i1 [[CMP3749]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3750]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2587:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3751:%.*]] = icmp ne i32 [[TMP2587]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3751]], label [[IF_THEN3752:%.*]], label [[IF_ELSE3753:%.*]] +// SIMD-ONLY0: if.then3752: +// SIMD-ONLY0-NEXT: [[TMP2588:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2588]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3754:%.*]] +// SIMD-ONLY0: if.else3753: +// SIMD-ONLY0-NEXT: [[TMP2589:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2589]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3754]] +// SIMD-ONLY0: if.end3754: +// SIMD-ONLY0-NEXT: [[TMP2590:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2590]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2591:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2592:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3755:%.*]] = icmp ugt i32 [[TMP2591]], [[TMP2592]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3755]], label [[IF_THEN3757:%.*]], label [[IF_END3758:%.*]] +// SIMD-ONLY0: if.then3757: +// SIMD-ONLY0-NEXT: [[TMP2593:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2593]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3758]] +// SIMD-ONLY0: if.end3758: +// SIMD-ONLY0-NEXT: [[TMP2594:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2594]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2595:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2596:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3759:%.*]] = icmp ugt i32 [[TMP2595]], [[TMP2596]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3759]], label [[IF_THEN3761:%.*]], label [[IF_END3762:%.*]] +// SIMD-ONLY0: if.then3761: +// SIMD-ONLY0-NEXT: [[TMP2597:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2597]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3762]] +// SIMD-ONLY0: if.end3762: +// SIMD-ONLY0-NEXT: [[TMP2598:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2598]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2599:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2600:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3763:%.*]] = icmp ult i32 [[TMP2599]], [[TMP2600]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3763]], label [[IF_THEN3765:%.*]], label [[IF_END3766:%.*]] +// SIMD-ONLY0: if.then3765: +// SIMD-ONLY0-NEXT: [[TMP2601:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2601]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3766]] +// SIMD-ONLY0: if.end3766: +// SIMD-ONLY0-NEXT: [[TMP2602:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2602]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2603:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2604:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3767:%.*]] = icmp ult i32 [[TMP2603]], [[TMP2604]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3767]], label [[IF_THEN3769:%.*]], label [[IF_END3770:%.*]] +// SIMD-ONLY0: if.then3769: +// SIMD-ONLY0-NEXT: [[TMP2605:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2605]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3770]] +// SIMD-ONLY0: if.end3770: +// SIMD-ONLY0-NEXT: [[TMP2606:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2606]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2607:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2608:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3771:%.*]] = icmp eq i32 [[TMP2607]], [[TMP2608]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3771]], label [[IF_THEN3773:%.*]], label [[IF_END3774:%.*]] +// SIMD-ONLY0: if.then3773: +// SIMD-ONLY0-NEXT: [[TMP2609:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2609]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3774]] +// SIMD-ONLY0: if.end3774: +// SIMD-ONLY0-NEXT: [[TMP2610:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2610]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2611:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2612:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3775:%.*]] = icmp eq i32 [[TMP2611]], [[TMP2612]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3775]], label [[IF_THEN3777:%.*]], label [[IF_END3778:%.*]] +// SIMD-ONLY0: if.then3777: +// SIMD-ONLY0-NEXT: [[TMP2613:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2613]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3778]] +// SIMD-ONLY0: if.end3778: +// SIMD-ONLY0-NEXT: [[TMP2614:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2615:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3779:%.*]] = icmp ugt i32 [[TMP2614]], [[TMP2615]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3779]], label [[IF_THEN3781:%.*]], label [[IF_END3782:%.*]] +// SIMD-ONLY0: if.then3781: +// SIMD-ONLY0-NEXT: [[TMP2616:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2616]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3782]] +// SIMD-ONLY0: if.end3782: +// SIMD-ONLY0-NEXT: [[TMP2617:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2617]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2618:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2619:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3783:%.*]] = icmp ugt i32 [[TMP2618]], [[TMP2619]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3783]], label [[IF_THEN3785:%.*]], label [[IF_END3786:%.*]] +// SIMD-ONLY0: if.then3785: +// SIMD-ONLY0-NEXT: [[TMP2620:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2620]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3786]] +// SIMD-ONLY0: if.end3786: +// SIMD-ONLY0-NEXT: [[TMP2621:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2621]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2622:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2623:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3787:%.*]] = icmp ult i32 [[TMP2622]], [[TMP2623]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3787]], label [[IF_THEN3789:%.*]], label [[IF_END3790:%.*]] +// SIMD-ONLY0: if.then3789: +// SIMD-ONLY0-NEXT: [[TMP2624:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2624]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3790]] +// SIMD-ONLY0: if.end3790: +// SIMD-ONLY0-NEXT: [[TMP2625:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2625]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2626:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2627:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3791:%.*]] = icmp ult i32 [[TMP2626]], [[TMP2627]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3791]], label [[IF_THEN3793:%.*]], label [[IF_END3794:%.*]] +// SIMD-ONLY0: if.then3793: +// SIMD-ONLY0-NEXT: [[TMP2628:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2628]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3794]] +// SIMD-ONLY0: if.end3794: +// SIMD-ONLY0-NEXT: [[TMP2629:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2629]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2630:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2631:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3795:%.*]] = icmp eq i32 [[TMP2630]], [[TMP2631]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3795]], label [[IF_THEN3797:%.*]], label [[IF_END3798:%.*]] +// SIMD-ONLY0: if.then3797: +// SIMD-ONLY0-NEXT: [[TMP2632:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2632]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3798]] +// SIMD-ONLY0: if.end3798: +// SIMD-ONLY0-NEXT: [[TMP2633:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2633]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2634:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2635:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3799:%.*]] = icmp eq i32 [[TMP2634]], [[TMP2635]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3799]], label [[IF_THEN3801:%.*]], label [[IF_END3802:%.*]] +// SIMD-ONLY0: if.then3801: +// SIMD-ONLY0-NEXT: [[TMP2636:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2636]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3802]] +// SIMD-ONLY0: if.end3802: +// SIMD-ONLY0-NEXT: [[TMP2637:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2637]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2638:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2639:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3803:%.*]] = icmp eq i32 [[TMP2638]], [[TMP2639]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3803]], label [[IF_THEN3805:%.*]], label [[IF_ELSE3806:%.*]] +// SIMD-ONLY0: if.then3805: +// SIMD-ONLY0-NEXT: [[TMP2640:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2640]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3807:%.*]] +// SIMD-ONLY0: if.else3806: +// SIMD-ONLY0-NEXT: [[TMP2641:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2641]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3807]] +// SIMD-ONLY0: if.end3807: +// SIMD-ONLY0-NEXT: [[TMP2642:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2643:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3808:%.*]] = icmp eq i32 [[TMP2642]], [[TMP2643]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3808]], label [[IF_THEN3810:%.*]], label [[IF_ELSE3811:%.*]] +// SIMD-ONLY0: if.then3810: +// SIMD-ONLY0-NEXT: [[TMP2644:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2644]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3812:%.*]] +// SIMD-ONLY0: if.else3811: +// SIMD-ONLY0-NEXT: [[TMP2645:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2645]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3812]] +// SIMD-ONLY0: if.end3812: +// SIMD-ONLY0-NEXT: [[TMP2646:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2647:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3813:%.*]] = icmp eq i32 [[TMP2646]], [[TMP2647]] +// SIMD-ONLY0-NEXT: [[CONV3814:%.*]] = zext i1 [[CMP3813]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3814]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2648:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3815:%.*]] = icmp ne i32 [[TMP2648]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3815]], label [[IF_THEN3816:%.*]], label [[IF_END3817:%.*]] +// SIMD-ONLY0: if.then3816: +// SIMD-ONLY0-NEXT: [[TMP2649:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2649]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3817]] +// SIMD-ONLY0: if.end3817: +// SIMD-ONLY0-NEXT: [[TMP2650:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2651:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3818:%.*]] = icmp eq i32 [[TMP2650]], [[TMP2651]] +// SIMD-ONLY0-NEXT: [[CONV3819:%.*]] = zext i1 [[CMP3818]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3819]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2652:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3820:%.*]] = icmp ne i32 [[TMP2652]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3820]], label [[IF_THEN3821:%.*]], label [[IF_END3822:%.*]] +// SIMD-ONLY0: if.then3821: +// SIMD-ONLY0-NEXT: [[TMP2653:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2653]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3822]] +// SIMD-ONLY0: if.end3822: +// SIMD-ONLY0-NEXT: [[TMP2654:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2655:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3823:%.*]] = icmp eq i32 [[TMP2654]], [[TMP2655]] +// SIMD-ONLY0-NEXT: [[CONV3824:%.*]] = zext i1 [[CMP3823]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3824]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2656:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3825:%.*]] = icmp ne i32 [[TMP2656]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3825]], label [[IF_THEN3826:%.*]], label [[IF_ELSE3827:%.*]] +// SIMD-ONLY0: if.then3826: +// SIMD-ONLY0-NEXT: [[TMP2657:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2657]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3828:%.*]] +// SIMD-ONLY0: if.else3827: +// SIMD-ONLY0-NEXT: [[TMP2658:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2658]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3828]] +// SIMD-ONLY0: if.end3828: +// SIMD-ONLY0-NEXT: [[TMP2659:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2660:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP3829:%.*]] = icmp eq i32 [[TMP2659]], [[TMP2660]] +// SIMD-ONLY0-NEXT: [[CONV3830:%.*]] = zext i1 [[CMP3829]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV3830]], ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2661:%.*]] = load i32, ptr [[UIR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL3831:%.*]] = icmp ne i32 [[TMP2661]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3831]], label [[IF_THEN3832:%.*]], label [[IF_ELSE3833:%.*]] +// SIMD-ONLY0: if.then3832: +// SIMD-ONLY0-NEXT: [[TMP2662:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2662]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3834:%.*]] +// SIMD-ONLY0: if.else3833: +// SIMD-ONLY0-NEXT: [[TMP2663:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP2663]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END3834]] +// SIMD-ONLY0: if.end3834: +// SIMD-ONLY0-NEXT: [[TMP2664:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2664]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2665:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2666:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3835:%.*]] = icmp sgt i64 [[TMP2665]], [[TMP2666]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3835]], label [[IF_THEN3837:%.*]], label [[IF_END3838:%.*]] +// SIMD-ONLY0: if.then3837: +// SIMD-ONLY0-NEXT: [[TMP2667:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2667]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3838]] +// SIMD-ONLY0: if.end3838: +// SIMD-ONLY0-NEXT: [[TMP2668:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2668]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2669:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2670:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3839:%.*]] = icmp sgt i64 [[TMP2669]], [[TMP2670]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3839]], label [[IF_THEN3841:%.*]], label [[IF_END3842:%.*]] +// SIMD-ONLY0: if.then3841: +// SIMD-ONLY0-NEXT: [[TMP2671:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2671]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3842]] +// SIMD-ONLY0: if.end3842: +// SIMD-ONLY0-NEXT: [[TMP2672:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2672]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2673:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2674:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3843:%.*]] = icmp slt i64 [[TMP2673]], [[TMP2674]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3843]], label [[IF_THEN3845:%.*]], label [[IF_END3846:%.*]] +// SIMD-ONLY0: if.then3845: +// SIMD-ONLY0-NEXT: [[TMP2675:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2675]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3846]] +// SIMD-ONLY0: if.end3846: +// SIMD-ONLY0-NEXT: [[TMP2676:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2676]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2677:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2678:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3847:%.*]] = icmp slt i64 [[TMP2677]], [[TMP2678]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3847]], label [[IF_THEN3849:%.*]], label [[IF_END3850:%.*]] +// SIMD-ONLY0: if.then3849: +// SIMD-ONLY0-NEXT: [[TMP2679:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2679]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3850]] +// SIMD-ONLY0: if.end3850: +// SIMD-ONLY0-NEXT: [[TMP2680:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2680]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2681:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2682:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3851:%.*]] = icmp eq i64 [[TMP2681]], [[TMP2682]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3851]], label [[IF_THEN3853:%.*]], label [[IF_END3854:%.*]] +// SIMD-ONLY0: if.then3853: +// SIMD-ONLY0-NEXT: [[TMP2683:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2683]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3854]] +// SIMD-ONLY0: if.end3854: +// SIMD-ONLY0-NEXT: [[TMP2684:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2684]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2685:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2686:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3855:%.*]] = icmp eq i64 [[TMP2685]], [[TMP2686]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3855]], label [[IF_THEN3857:%.*]], label [[IF_END3858:%.*]] +// SIMD-ONLY0: if.then3857: +// SIMD-ONLY0-NEXT: [[TMP2687:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2687]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3858]] +// SIMD-ONLY0: if.end3858: +// SIMD-ONLY0-NEXT: [[TMP2688:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2689:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3859:%.*]] = icmp sgt i64 [[TMP2688]], [[TMP2689]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3859]], label [[IF_THEN3861:%.*]], label [[IF_END3862:%.*]] +// SIMD-ONLY0: if.then3861: +// SIMD-ONLY0-NEXT: [[TMP2690:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2690]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3862]] +// SIMD-ONLY0: if.end3862: +// SIMD-ONLY0-NEXT: [[TMP2691:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2691]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2692:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2693:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3863:%.*]] = icmp sgt i64 [[TMP2692]], [[TMP2693]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3863]], label [[IF_THEN3865:%.*]], label [[IF_END3866:%.*]] +// SIMD-ONLY0: if.then3865: +// SIMD-ONLY0-NEXT: [[TMP2694:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2694]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3866]] +// SIMD-ONLY0: if.end3866: +// SIMD-ONLY0-NEXT: [[TMP2695:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2695]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2696:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2697:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3867:%.*]] = icmp slt i64 [[TMP2696]], [[TMP2697]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3867]], label [[IF_THEN3869:%.*]], label [[IF_END3870:%.*]] +// SIMD-ONLY0: if.then3869: +// SIMD-ONLY0-NEXT: [[TMP2698:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2698]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3870]] +// SIMD-ONLY0: if.end3870: +// SIMD-ONLY0-NEXT: [[TMP2699:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2699]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2700:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2701:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3871:%.*]] = icmp slt i64 [[TMP2700]], [[TMP2701]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3871]], label [[IF_THEN3873:%.*]], label [[IF_END3874:%.*]] +// SIMD-ONLY0: if.then3873: +// SIMD-ONLY0-NEXT: [[TMP2702:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2702]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3874]] +// SIMD-ONLY0: if.end3874: +// SIMD-ONLY0-NEXT: [[TMP2703:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2703]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2704:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2705:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3875:%.*]] = icmp eq i64 [[TMP2704]], [[TMP2705]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3875]], label [[IF_THEN3877:%.*]], label [[IF_END3878:%.*]] +// SIMD-ONLY0: if.then3877: +// SIMD-ONLY0-NEXT: [[TMP2706:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2706]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3878]] +// SIMD-ONLY0: if.end3878: +// SIMD-ONLY0-NEXT: [[TMP2707:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2707]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2708:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2709:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3879:%.*]] = icmp eq i64 [[TMP2708]], [[TMP2709]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3879]], label [[IF_THEN3881:%.*]], label [[IF_END3882:%.*]] +// SIMD-ONLY0: if.then3881: +// SIMD-ONLY0-NEXT: [[TMP2710:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2710]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3882]] +// SIMD-ONLY0: if.end3882: +// SIMD-ONLY0-NEXT: [[TMP2711:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2711]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2712:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2713:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3883:%.*]] = icmp eq i64 [[TMP2712]], [[TMP2713]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3883]], label [[IF_THEN3885:%.*]], label [[IF_ELSE3886:%.*]] +// SIMD-ONLY0: if.then3885: +// SIMD-ONLY0-NEXT: [[TMP2714:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2714]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3887:%.*]] +// SIMD-ONLY0: if.else3886: +// SIMD-ONLY0-NEXT: [[TMP2715:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2715]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3887]] +// SIMD-ONLY0: if.end3887: +// SIMD-ONLY0-NEXT: [[TMP2716:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2717:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3888:%.*]] = icmp eq i64 [[TMP2716]], [[TMP2717]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3888]], label [[IF_THEN3890:%.*]], label [[IF_ELSE3891:%.*]] +// SIMD-ONLY0: if.then3890: +// SIMD-ONLY0-NEXT: [[TMP2718:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2718]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3892:%.*]] +// SIMD-ONLY0: if.else3891: +// SIMD-ONLY0-NEXT: [[TMP2719:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2719]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3892]] +// SIMD-ONLY0: if.end3892: +// SIMD-ONLY0-NEXT: [[TMP2720:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2721:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3893:%.*]] = icmp eq i64 [[TMP2720]], [[TMP2721]] +// SIMD-ONLY0-NEXT: [[CONV3894:%.*]] = zext i1 [[CMP3893]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3895:%.*]] = sext i32 [[CONV3894]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3895]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2722:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3896:%.*]] = icmp ne i64 [[TMP2722]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3896]], label [[IF_THEN3897:%.*]], label [[IF_END3898:%.*]] +// SIMD-ONLY0: if.then3897: +// SIMD-ONLY0-NEXT: [[TMP2723:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2723]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3898]] +// SIMD-ONLY0: if.end3898: +// SIMD-ONLY0-NEXT: [[TMP2724:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2725:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3899:%.*]] = icmp eq i64 [[TMP2724]], [[TMP2725]] +// SIMD-ONLY0-NEXT: [[CONV3900:%.*]] = zext i1 [[CMP3899]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3901:%.*]] = sext i32 [[CONV3900]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3901]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2726:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3902:%.*]] = icmp ne i64 [[TMP2726]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3902]], label [[IF_THEN3903:%.*]], label [[IF_END3904:%.*]] +// SIMD-ONLY0: if.then3903: +// SIMD-ONLY0-NEXT: [[TMP2727:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2727]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3904]] +// SIMD-ONLY0: if.end3904: +// SIMD-ONLY0-NEXT: [[TMP2728:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2729:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3905:%.*]] = icmp eq i64 [[TMP2728]], [[TMP2729]] +// SIMD-ONLY0-NEXT: [[CONV3906:%.*]] = zext i1 [[CMP3905]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3907:%.*]] = sext i32 [[CONV3906]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3907]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2730:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3908:%.*]] = icmp ne i64 [[TMP2730]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3908]], label [[IF_THEN3909:%.*]], label [[IF_ELSE3910:%.*]] +// SIMD-ONLY0: if.then3909: +// SIMD-ONLY0-NEXT: [[TMP2731:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2731]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3911:%.*]] +// SIMD-ONLY0: if.else3910: +// SIMD-ONLY0-NEXT: [[TMP2732:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2732]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3911]] +// SIMD-ONLY0: if.end3911: +// SIMD-ONLY0-NEXT: [[TMP2733:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2734:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3912:%.*]] = icmp eq i64 [[TMP2733]], [[TMP2734]] +// SIMD-ONLY0-NEXT: [[CONV3913:%.*]] = zext i1 [[CMP3912]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3914:%.*]] = sext i32 [[CONV3913]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3914]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2735:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3915:%.*]] = icmp ne i64 [[TMP2735]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3915]], label [[IF_THEN3916:%.*]], label [[IF_ELSE3917:%.*]] +// SIMD-ONLY0: if.then3916: +// SIMD-ONLY0-NEXT: [[TMP2736:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2736]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3918:%.*]] +// SIMD-ONLY0: if.else3917: +// SIMD-ONLY0-NEXT: [[TMP2737:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2737]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3918]] +// SIMD-ONLY0: if.end3918: +// SIMD-ONLY0-NEXT: [[TMP2738:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2738]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2739:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2740:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3919:%.*]] = icmp sgt i64 [[TMP2739]], [[TMP2740]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3919]], label [[IF_THEN3921:%.*]], label [[IF_END3922:%.*]] +// SIMD-ONLY0: if.then3921: +// SIMD-ONLY0-NEXT: [[TMP2741:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2741]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3922]] +// SIMD-ONLY0: if.end3922: +// SIMD-ONLY0-NEXT: [[TMP2742:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2742]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2743:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2744:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3923:%.*]] = icmp sgt i64 [[TMP2743]], [[TMP2744]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3923]], label [[IF_THEN3925:%.*]], label [[IF_END3926:%.*]] +// SIMD-ONLY0: if.then3925: +// SIMD-ONLY0-NEXT: [[TMP2745:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2745]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3926]] +// SIMD-ONLY0: if.end3926: +// SIMD-ONLY0-NEXT: [[TMP2746:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2746]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2747:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2748:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3927:%.*]] = icmp slt i64 [[TMP2747]], [[TMP2748]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3927]], label [[IF_THEN3929:%.*]], label [[IF_END3930:%.*]] +// SIMD-ONLY0: if.then3929: +// SIMD-ONLY0-NEXT: [[TMP2749:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2749]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3930]] +// SIMD-ONLY0: if.end3930: +// SIMD-ONLY0-NEXT: [[TMP2750:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2750]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2751:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2752:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3931:%.*]] = icmp slt i64 [[TMP2751]], [[TMP2752]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3931]], label [[IF_THEN3933:%.*]], label [[IF_END3934:%.*]] +// SIMD-ONLY0: if.then3933: +// SIMD-ONLY0-NEXT: [[TMP2753:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2753]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3934]] +// SIMD-ONLY0: if.end3934: +// SIMD-ONLY0-NEXT: [[TMP2754:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2754]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2755:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2756:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3935:%.*]] = icmp eq i64 [[TMP2755]], [[TMP2756]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3935]], label [[IF_THEN3937:%.*]], label [[IF_END3938:%.*]] +// SIMD-ONLY0: if.then3937: +// SIMD-ONLY0-NEXT: [[TMP2757:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2757]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3938]] +// SIMD-ONLY0: if.end3938: +// SIMD-ONLY0-NEXT: [[TMP2758:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2758]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2759:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2760:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3939:%.*]] = icmp eq i64 [[TMP2759]], [[TMP2760]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3939]], label [[IF_THEN3941:%.*]], label [[IF_END3942:%.*]] +// SIMD-ONLY0: if.then3941: +// SIMD-ONLY0-NEXT: [[TMP2761:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2761]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3942]] +// SIMD-ONLY0: if.end3942: +// SIMD-ONLY0-NEXT: [[TMP2762:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2763:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3943:%.*]] = icmp sgt i64 [[TMP2762]], [[TMP2763]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3943]], label [[IF_THEN3945:%.*]], label [[IF_END3946:%.*]] +// SIMD-ONLY0: if.then3945: +// SIMD-ONLY0-NEXT: [[TMP2764:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2764]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3946]] +// SIMD-ONLY0: if.end3946: +// SIMD-ONLY0-NEXT: [[TMP2765:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2765]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2766:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2767:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3947:%.*]] = icmp sgt i64 [[TMP2766]], [[TMP2767]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3947]], label [[IF_THEN3949:%.*]], label [[IF_END3950:%.*]] +// SIMD-ONLY0: if.then3949: +// SIMD-ONLY0-NEXT: [[TMP2768:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2768]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3950]] +// SIMD-ONLY0: if.end3950: +// SIMD-ONLY0-NEXT: [[TMP2769:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2769]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2770:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2771:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3951:%.*]] = icmp slt i64 [[TMP2770]], [[TMP2771]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3951]], label [[IF_THEN3953:%.*]], label [[IF_END3954:%.*]] +// SIMD-ONLY0: if.then3953: +// SIMD-ONLY0-NEXT: [[TMP2772:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2772]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3954]] +// SIMD-ONLY0: if.end3954: +// SIMD-ONLY0-NEXT: [[TMP2773:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2773]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2774:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2775:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3955:%.*]] = icmp slt i64 [[TMP2774]], [[TMP2775]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3955]], label [[IF_THEN3957:%.*]], label [[IF_END3958:%.*]] +// SIMD-ONLY0: if.then3957: +// SIMD-ONLY0-NEXT: [[TMP2776:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2776]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3958]] +// SIMD-ONLY0: if.end3958: +// SIMD-ONLY0-NEXT: [[TMP2777:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2777]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2778:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2779:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3959:%.*]] = icmp eq i64 [[TMP2778]], [[TMP2779]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3959]], label [[IF_THEN3961:%.*]], label [[IF_END3962:%.*]] +// SIMD-ONLY0: if.then3961: +// SIMD-ONLY0-NEXT: [[TMP2780:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2780]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3962]] +// SIMD-ONLY0: if.end3962: +// SIMD-ONLY0-NEXT: [[TMP2781:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2781]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2782:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2783:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3963:%.*]] = icmp eq i64 [[TMP2782]], [[TMP2783]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3963]], label [[IF_THEN3965:%.*]], label [[IF_END3966:%.*]] +// SIMD-ONLY0: if.then3965: +// SIMD-ONLY0-NEXT: [[TMP2784:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2784]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3966]] +// SIMD-ONLY0: if.end3966: +// SIMD-ONLY0-NEXT: [[TMP2785:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2785]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2786:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2787:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3967:%.*]] = icmp eq i64 [[TMP2786]], [[TMP2787]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3967]], label [[IF_THEN3969:%.*]], label [[IF_ELSE3970:%.*]] +// SIMD-ONLY0: if.then3969: +// SIMD-ONLY0-NEXT: [[TMP2788:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2788]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3971:%.*]] +// SIMD-ONLY0: if.else3970: +// SIMD-ONLY0-NEXT: [[TMP2789:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2789]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3971]] +// SIMD-ONLY0: if.end3971: +// SIMD-ONLY0-NEXT: [[TMP2790:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2791:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3972:%.*]] = icmp eq i64 [[TMP2790]], [[TMP2791]] +// SIMD-ONLY0-NEXT: br i1 [[CMP3972]], label [[IF_THEN3974:%.*]], label [[IF_ELSE3975:%.*]] +// SIMD-ONLY0: if.then3974: +// SIMD-ONLY0-NEXT: [[TMP2792:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2792]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3976:%.*]] +// SIMD-ONLY0: if.else3975: +// SIMD-ONLY0-NEXT: [[TMP2793:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2793]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3976]] +// SIMD-ONLY0: if.end3976: +// SIMD-ONLY0-NEXT: [[TMP2794:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2795:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3977:%.*]] = icmp eq i64 [[TMP2794]], [[TMP2795]] +// SIMD-ONLY0-NEXT: [[CONV3978:%.*]] = zext i1 [[CMP3977]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3979:%.*]] = sext i32 [[CONV3978]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3979]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2796:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3980:%.*]] = icmp ne i64 [[TMP2796]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3980]], label [[IF_THEN3981:%.*]], label [[IF_END3982:%.*]] +// SIMD-ONLY0: if.then3981: +// SIMD-ONLY0-NEXT: [[TMP2797:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2797]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3982]] +// SIMD-ONLY0: if.end3982: +// SIMD-ONLY0-NEXT: [[TMP2798:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2799:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3983:%.*]] = icmp eq i64 [[TMP2798]], [[TMP2799]] +// SIMD-ONLY0-NEXT: [[CONV3984:%.*]] = zext i1 [[CMP3983]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3985:%.*]] = sext i32 [[CONV3984]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3985]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2800:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3986:%.*]] = icmp ne i64 [[TMP2800]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3986]], label [[IF_THEN3987:%.*]], label [[IF_END3988:%.*]] +// SIMD-ONLY0: if.then3987: +// SIMD-ONLY0-NEXT: [[TMP2801:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2801]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3988]] +// SIMD-ONLY0: if.end3988: +// SIMD-ONLY0-NEXT: [[TMP2802:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2803:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3989:%.*]] = icmp eq i64 [[TMP2802]], [[TMP2803]] +// SIMD-ONLY0-NEXT: [[CONV3990:%.*]] = zext i1 [[CMP3989]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3991:%.*]] = sext i32 [[CONV3990]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3991]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2804:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3992:%.*]] = icmp ne i64 [[TMP2804]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3992]], label [[IF_THEN3993:%.*]], label [[IF_ELSE3994:%.*]] +// SIMD-ONLY0: if.then3993: +// SIMD-ONLY0-NEXT: [[TMP2805:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2805]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3995:%.*]] +// SIMD-ONLY0: if.else3994: +// SIMD-ONLY0-NEXT: [[TMP2806:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2806]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END3995]] +// SIMD-ONLY0: if.end3995: +// SIMD-ONLY0-NEXT: [[TMP2807:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2808:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP3996:%.*]] = icmp eq i64 [[TMP2807]], [[TMP2808]] +// SIMD-ONLY0-NEXT: [[CONV3997:%.*]] = zext i1 [[CMP3996]] to i32 +// SIMD-ONLY0-NEXT: [[CONV3998:%.*]] = sext i32 [[CONV3997]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV3998]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2809:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL3999:%.*]] = icmp ne i64 [[TMP2809]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL3999]], label [[IF_THEN4000:%.*]], label [[IF_ELSE4001:%.*]] +// SIMD-ONLY0: if.then4000: +// SIMD-ONLY0-NEXT: [[TMP2810:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2810]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4002:%.*]] +// SIMD-ONLY0: if.else4001: +// SIMD-ONLY0-NEXT: [[TMP2811:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2811]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4002]] +// SIMD-ONLY0: if.end4002: +// SIMD-ONLY0-NEXT: [[TMP2812:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2812]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2813:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2814:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4003:%.*]] = icmp sgt i64 [[TMP2813]], [[TMP2814]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4003]], label [[IF_THEN4005:%.*]], label [[IF_END4006:%.*]] +// SIMD-ONLY0: if.then4005: +// SIMD-ONLY0-NEXT: [[TMP2815:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2815]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4006]] +// SIMD-ONLY0: if.end4006: +// SIMD-ONLY0-NEXT: [[TMP2816:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2816]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2817:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2818:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4007:%.*]] = icmp sgt i64 [[TMP2817]], [[TMP2818]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4007]], label [[IF_THEN4009:%.*]], label [[IF_END4010:%.*]] +// SIMD-ONLY0: if.then4009: +// SIMD-ONLY0-NEXT: [[TMP2819:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2819]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4010]] +// SIMD-ONLY0: if.end4010: +// SIMD-ONLY0-NEXT: [[TMP2820:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2820]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2821:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2822:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4011:%.*]] = icmp slt i64 [[TMP2821]], [[TMP2822]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4011]], label [[IF_THEN4013:%.*]], label [[IF_END4014:%.*]] +// SIMD-ONLY0: if.then4013: +// SIMD-ONLY0-NEXT: [[TMP2823:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2823]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4014]] +// SIMD-ONLY0: if.end4014: +// SIMD-ONLY0-NEXT: [[TMP2824:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2824]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2825:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2826:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4015:%.*]] = icmp slt i64 [[TMP2825]], [[TMP2826]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4015]], label [[IF_THEN4017:%.*]], label [[IF_END4018:%.*]] +// SIMD-ONLY0: if.then4017: +// SIMD-ONLY0-NEXT: [[TMP2827:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2827]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4018]] +// SIMD-ONLY0: if.end4018: +// SIMD-ONLY0-NEXT: [[TMP2828:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2828]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2829:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2830:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4019:%.*]] = icmp eq i64 [[TMP2829]], [[TMP2830]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4019]], label [[IF_THEN4021:%.*]], label [[IF_END4022:%.*]] +// SIMD-ONLY0: if.then4021: +// SIMD-ONLY0-NEXT: [[TMP2831:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2831]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4022]] +// SIMD-ONLY0: if.end4022: +// SIMD-ONLY0-NEXT: [[TMP2832:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2832]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2833:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2834:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4023:%.*]] = icmp eq i64 [[TMP2833]], [[TMP2834]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4023]], label [[IF_THEN4025:%.*]], label [[IF_END4026:%.*]] +// SIMD-ONLY0: if.then4025: +// SIMD-ONLY0-NEXT: [[TMP2835:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2835]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4026]] +// SIMD-ONLY0: if.end4026: +// SIMD-ONLY0-NEXT: [[TMP2836:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2837:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4027:%.*]] = icmp sgt i64 [[TMP2836]], [[TMP2837]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4027]], label [[IF_THEN4029:%.*]], label [[IF_END4030:%.*]] +// SIMD-ONLY0: if.then4029: +// SIMD-ONLY0-NEXT: [[TMP2838:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2838]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4030]] +// SIMD-ONLY0: if.end4030: +// SIMD-ONLY0-NEXT: [[TMP2839:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2839]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2840:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2841:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4031:%.*]] = icmp sgt i64 [[TMP2840]], [[TMP2841]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4031]], label [[IF_THEN4033:%.*]], label [[IF_END4034:%.*]] +// SIMD-ONLY0: if.then4033: +// SIMD-ONLY0-NEXT: [[TMP2842:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2842]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4034]] +// SIMD-ONLY0: if.end4034: +// SIMD-ONLY0-NEXT: [[TMP2843:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2843]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2844:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2845:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4035:%.*]] = icmp slt i64 [[TMP2844]], [[TMP2845]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4035]], label [[IF_THEN4037:%.*]], label [[IF_END4038:%.*]] +// SIMD-ONLY0: if.then4037: +// SIMD-ONLY0-NEXT: [[TMP2846:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2846]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4038]] +// SIMD-ONLY0: if.end4038: +// SIMD-ONLY0-NEXT: [[TMP2847:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2847]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2848:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2849:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4039:%.*]] = icmp slt i64 [[TMP2848]], [[TMP2849]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4039]], label [[IF_THEN4041:%.*]], label [[IF_END4042:%.*]] +// SIMD-ONLY0: if.then4041: +// SIMD-ONLY0-NEXT: [[TMP2850:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2850]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4042]] +// SIMD-ONLY0: if.end4042: +// SIMD-ONLY0-NEXT: [[TMP2851:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2851]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2852:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2853:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4043:%.*]] = icmp eq i64 [[TMP2852]], [[TMP2853]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4043]], label [[IF_THEN4045:%.*]], label [[IF_END4046:%.*]] +// SIMD-ONLY0: if.then4045: +// SIMD-ONLY0-NEXT: [[TMP2854:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2854]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4046]] +// SIMD-ONLY0: if.end4046: +// SIMD-ONLY0-NEXT: [[TMP2855:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2855]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2856:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2857:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4047:%.*]] = icmp eq i64 [[TMP2856]], [[TMP2857]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4047]], label [[IF_THEN4049:%.*]], label [[IF_END4050:%.*]] +// SIMD-ONLY0: if.then4049: +// SIMD-ONLY0-NEXT: [[TMP2858:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2858]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4050]] +// SIMD-ONLY0: if.end4050: +// SIMD-ONLY0-NEXT: [[TMP2859:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2859]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2860:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2861:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4051:%.*]] = icmp eq i64 [[TMP2860]], [[TMP2861]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4051]], label [[IF_THEN4053:%.*]], label [[IF_ELSE4054:%.*]] +// SIMD-ONLY0: if.then4053: +// SIMD-ONLY0-NEXT: [[TMP2862:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2862]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4055:%.*]] +// SIMD-ONLY0: if.else4054: +// SIMD-ONLY0-NEXT: [[TMP2863:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2863]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4055]] +// SIMD-ONLY0: if.end4055: +// SIMD-ONLY0-NEXT: [[TMP2864:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2865:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4056:%.*]] = icmp eq i64 [[TMP2864]], [[TMP2865]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4056]], label [[IF_THEN4058:%.*]], label [[IF_ELSE4059:%.*]] +// SIMD-ONLY0: if.then4058: +// SIMD-ONLY0-NEXT: [[TMP2866:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2866]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4060:%.*]] +// SIMD-ONLY0: if.else4059: +// SIMD-ONLY0-NEXT: [[TMP2867:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2867]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4060]] +// SIMD-ONLY0: if.end4060: +// SIMD-ONLY0-NEXT: [[TMP2868:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2869:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4061:%.*]] = icmp eq i64 [[TMP2868]], [[TMP2869]] +// SIMD-ONLY0-NEXT: [[CONV4062:%.*]] = zext i1 [[CMP4061]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4063:%.*]] = sext i32 [[CONV4062]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4063]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2870:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4064:%.*]] = icmp ne i64 [[TMP2870]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4064]], label [[IF_THEN4065:%.*]], label [[IF_END4066:%.*]] +// SIMD-ONLY0: if.then4065: +// SIMD-ONLY0-NEXT: [[TMP2871:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2871]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4066]] +// SIMD-ONLY0: if.end4066: +// SIMD-ONLY0-NEXT: [[TMP2872:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2873:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4067:%.*]] = icmp eq i64 [[TMP2872]], [[TMP2873]] +// SIMD-ONLY0-NEXT: [[CONV4068:%.*]] = zext i1 [[CMP4067]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4069:%.*]] = sext i32 [[CONV4068]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4069]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2874:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4070:%.*]] = icmp ne i64 [[TMP2874]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4070]], label [[IF_THEN4071:%.*]], label [[IF_END4072:%.*]] +// SIMD-ONLY0: if.then4071: +// SIMD-ONLY0-NEXT: [[TMP2875:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2875]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4072]] +// SIMD-ONLY0: if.end4072: +// SIMD-ONLY0-NEXT: [[TMP2876:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2877:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4073:%.*]] = icmp eq i64 [[TMP2876]], [[TMP2877]] +// SIMD-ONLY0-NEXT: [[CONV4074:%.*]] = zext i1 [[CMP4073]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4075:%.*]] = sext i32 [[CONV4074]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4075]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2878:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4076:%.*]] = icmp ne i64 [[TMP2878]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4076]], label [[IF_THEN4077:%.*]], label [[IF_ELSE4078:%.*]] +// SIMD-ONLY0: if.then4077: +// SIMD-ONLY0-NEXT: [[TMP2879:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2879]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4079:%.*]] +// SIMD-ONLY0: if.else4078: +// SIMD-ONLY0-NEXT: [[TMP2880:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2880]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4079]] +// SIMD-ONLY0: if.end4079: +// SIMD-ONLY0-NEXT: [[TMP2881:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2882:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4080:%.*]] = icmp eq i64 [[TMP2881]], [[TMP2882]] +// SIMD-ONLY0-NEXT: [[CONV4081:%.*]] = zext i1 [[CMP4080]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4082:%.*]] = sext i32 [[CONV4081]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4082]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2883:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4083:%.*]] = icmp ne i64 [[TMP2883]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4083]], label [[IF_THEN4084:%.*]], label [[IF_ELSE4085:%.*]] +// SIMD-ONLY0: if.then4084: +// SIMD-ONLY0-NEXT: [[TMP2884:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2884]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4086:%.*]] +// SIMD-ONLY0: if.else4085: +// SIMD-ONLY0-NEXT: [[TMP2885:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2885]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4086]] +// SIMD-ONLY0: if.end4086: +// SIMD-ONLY0-NEXT: [[TMP2886:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2886]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2887:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2888:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4087:%.*]] = icmp sgt i64 [[TMP2887]], [[TMP2888]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4087]], label [[IF_THEN4089:%.*]], label [[IF_END4090:%.*]] +// SIMD-ONLY0: if.then4089: +// SIMD-ONLY0-NEXT: [[TMP2889:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2889]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4090]] +// SIMD-ONLY0: if.end4090: +// SIMD-ONLY0-NEXT: [[TMP2890:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2890]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2891:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2892:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4091:%.*]] = icmp sgt i64 [[TMP2891]], [[TMP2892]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4091]], label [[IF_THEN4093:%.*]], label [[IF_END4094:%.*]] +// SIMD-ONLY0: if.then4093: +// SIMD-ONLY0-NEXT: [[TMP2893:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2893]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4094]] +// SIMD-ONLY0: if.end4094: +// SIMD-ONLY0-NEXT: [[TMP2894:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2894]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2895:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2896:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4095:%.*]] = icmp slt i64 [[TMP2895]], [[TMP2896]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4095]], label [[IF_THEN4097:%.*]], label [[IF_END4098:%.*]] +// SIMD-ONLY0: if.then4097: +// SIMD-ONLY0-NEXT: [[TMP2897:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2897]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4098]] +// SIMD-ONLY0: if.end4098: +// SIMD-ONLY0-NEXT: [[TMP2898:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2898]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2899:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2900:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4099:%.*]] = icmp slt i64 [[TMP2899]], [[TMP2900]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4099]], label [[IF_THEN4101:%.*]], label [[IF_END4102:%.*]] +// SIMD-ONLY0: if.then4101: +// SIMD-ONLY0-NEXT: [[TMP2901:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2901]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4102]] +// SIMD-ONLY0: if.end4102: +// SIMD-ONLY0-NEXT: [[TMP2902:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2902]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2903:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2904:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4103:%.*]] = icmp eq i64 [[TMP2903]], [[TMP2904]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4103]], label [[IF_THEN4105:%.*]], label [[IF_END4106:%.*]] +// SIMD-ONLY0: if.then4105: +// SIMD-ONLY0-NEXT: [[TMP2905:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2905]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4106]] +// SIMD-ONLY0: if.end4106: +// SIMD-ONLY0-NEXT: [[TMP2906:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2906]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2907:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2908:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4107:%.*]] = icmp eq i64 [[TMP2907]], [[TMP2908]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4107]], label [[IF_THEN4109:%.*]], label [[IF_END4110:%.*]] +// SIMD-ONLY0: if.then4109: +// SIMD-ONLY0-NEXT: [[TMP2909:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2909]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4110]] +// SIMD-ONLY0: if.end4110: +// SIMD-ONLY0-NEXT: [[TMP2910:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2911:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4111:%.*]] = icmp sgt i64 [[TMP2910]], [[TMP2911]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4111]], label [[IF_THEN4113:%.*]], label [[IF_END4114:%.*]] +// SIMD-ONLY0: if.then4113: +// SIMD-ONLY0-NEXT: [[TMP2912:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2912]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4114]] +// SIMD-ONLY0: if.end4114: +// SIMD-ONLY0-NEXT: [[TMP2913:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2913]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2914:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2915:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4115:%.*]] = icmp sgt i64 [[TMP2914]], [[TMP2915]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4115]], label [[IF_THEN4117:%.*]], label [[IF_END4118:%.*]] +// SIMD-ONLY0: if.then4117: +// SIMD-ONLY0-NEXT: [[TMP2916:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2916]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4118]] +// SIMD-ONLY0: if.end4118: +// SIMD-ONLY0-NEXT: [[TMP2917:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2917]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2918:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2919:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4119:%.*]] = icmp slt i64 [[TMP2918]], [[TMP2919]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4119]], label [[IF_THEN4121:%.*]], label [[IF_END4122:%.*]] +// SIMD-ONLY0: if.then4121: +// SIMD-ONLY0-NEXT: [[TMP2920:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2920]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4122]] +// SIMD-ONLY0: if.end4122: +// SIMD-ONLY0-NEXT: [[TMP2921:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2921]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2922:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2923:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4123:%.*]] = icmp slt i64 [[TMP2922]], [[TMP2923]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4123]], label [[IF_THEN4125:%.*]], label [[IF_END4126:%.*]] +// SIMD-ONLY0: if.then4125: +// SIMD-ONLY0-NEXT: [[TMP2924:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2924]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4126]] +// SIMD-ONLY0: if.end4126: +// SIMD-ONLY0-NEXT: [[TMP2925:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2925]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2926:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2927:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4127:%.*]] = icmp eq i64 [[TMP2926]], [[TMP2927]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4127]], label [[IF_THEN4129:%.*]], label [[IF_END4130:%.*]] +// SIMD-ONLY0: if.then4129: +// SIMD-ONLY0-NEXT: [[TMP2928:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2928]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4130]] +// SIMD-ONLY0: if.end4130: +// SIMD-ONLY0-NEXT: [[TMP2929:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2929]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2930:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2931:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4131:%.*]] = icmp eq i64 [[TMP2930]], [[TMP2931]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4131]], label [[IF_THEN4133:%.*]], label [[IF_END4134:%.*]] +// SIMD-ONLY0: if.then4133: +// SIMD-ONLY0-NEXT: [[TMP2932:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2932]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4134]] +// SIMD-ONLY0: if.end4134: +// SIMD-ONLY0-NEXT: [[TMP2933:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2933]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2934:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2935:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4135:%.*]] = icmp eq i64 [[TMP2934]], [[TMP2935]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4135]], label [[IF_THEN4137:%.*]], label [[IF_ELSE4138:%.*]] +// SIMD-ONLY0: if.then4137: +// SIMD-ONLY0-NEXT: [[TMP2936:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2936]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4139:%.*]] +// SIMD-ONLY0: if.else4138: +// SIMD-ONLY0-NEXT: [[TMP2937:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2937]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4139]] +// SIMD-ONLY0: if.end4139: +// SIMD-ONLY0-NEXT: [[TMP2938:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2939:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4140:%.*]] = icmp eq i64 [[TMP2938]], [[TMP2939]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4140]], label [[IF_THEN4142:%.*]], label [[IF_ELSE4143:%.*]] +// SIMD-ONLY0: if.then4142: +// SIMD-ONLY0-NEXT: [[TMP2940:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2940]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4144:%.*]] +// SIMD-ONLY0: if.else4143: +// SIMD-ONLY0-NEXT: [[TMP2941:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2941]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4144]] +// SIMD-ONLY0: if.end4144: +// SIMD-ONLY0-NEXT: [[TMP2942:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2943:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4145:%.*]] = icmp eq i64 [[TMP2942]], [[TMP2943]] +// SIMD-ONLY0-NEXT: [[CONV4146:%.*]] = zext i1 [[CMP4145]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4147:%.*]] = sext i32 [[CONV4146]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4147]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2944:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4148:%.*]] = icmp ne i64 [[TMP2944]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4148]], label [[IF_THEN4149:%.*]], label [[IF_END4150:%.*]] +// SIMD-ONLY0: if.then4149: +// SIMD-ONLY0-NEXT: [[TMP2945:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2945]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4150]] +// SIMD-ONLY0: if.end4150: +// SIMD-ONLY0-NEXT: [[TMP2946:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2947:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4151:%.*]] = icmp eq i64 [[TMP2946]], [[TMP2947]] +// SIMD-ONLY0-NEXT: [[CONV4152:%.*]] = zext i1 [[CMP4151]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4153:%.*]] = sext i32 [[CONV4152]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4153]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2948:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4154:%.*]] = icmp ne i64 [[TMP2948]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4154]], label [[IF_THEN4155:%.*]], label [[IF_END4156:%.*]] +// SIMD-ONLY0: if.then4155: +// SIMD-ONLY0-NEXT: [[TMP2949:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2949]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4156]] +// SIMD-ONLY0: if.end4156: +// SIMD-ONLY0-NEXT: [[TMP2950:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2951:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4157:%.*]] = icmp eq i64 [[TMP2950]], [[TMP2951]] +// SIMD-ONLY0-NEXT: [[CONV4158:%.*]] = zext i1 [[CMP4157]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4159:%.*]] = sext i32 [[CONV4158]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4159]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2952:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4160:%.*]] = icmp ne i64 [[TMP2952]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4160]], label [[IF_THEN4161:%.*]], label [[IF_ELSE4162:%.*]] +// SIMD-ONLY0: if.then4161: +// SIMD-ONLY0-NEXT: [[TMP2953:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2953]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4163:%.*]] +// SIMD-ONLY0: if.else4162: +// SIMD-ONLY0-NEXT: [[TMP2954:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2954]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4163]] +// SIMD-ONLY0: if.end4163: +// SIMD-ONLY0-NEXT: [[TMP2955:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2956:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4164:%.*]] = icmp eq i64 [[TMP2955]], [[TMP2956]] +// SIMD-ONLY0-NEXT: [[CONV4165:%.*]] = zext i1 [[CMP4164]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4166:%.*]] = sext i32 [[CONV4165]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4166]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2957:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4167:%.*]] = icmp ne i64 [[TMP2957]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4167]], label [[IF_THEN4168:%.*]], label [[IF_ELSE4169:%.*]] +// SIMD-ONLY0: if.then4168: +// SIMD-ONLY0-NEXT: [[TMP2958:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2958]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4170:%.*]] +// SIMD-ONLY0: if.else4169: +// SIMD-ONLY0-NEXT: [[TMP2959:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2959]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4170]] +// SIMD-ONLY0: if.end4170: +// SIMD-ONLY0-NEXT: [[TMP2960:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2960]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2961:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2962:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4171:%.*]] = icmp sgt i64 [[TMP2961]], [[TMP2962]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4171]], label [[IF_THEN4173:%.*]], label [[IF_END4174:%.*]] +// SIMD-ONLY0: if.then4173: +// SIMD-ONLY0-NEXT: [[TMP2963:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2963]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4174]] +// SIMD-ONLY0: if.end4174: +// SIMD-ONLY0-NEXT: [[TMP2964:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2964]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2965:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2966:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4175:%.*]] = icmp sgt i64 [[TMP2965]], [[TMP2966]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4175]], label [[IF_THEN4177:%.*]], label [[IF_END4178:%.*]] +// SIMD-ONLY0: if.then4177: +// SIMD-ONLY0-NEXT: [[TMP2967:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2967]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4178]] +// SIMD-ONLY0: if.end4178: +// SIMD-ONLY0-NEXT: [[TMP2968:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2968]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2969:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2970:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4179:%.*]] = icmp slt i64 [[TMP2969]], [[TMP2970]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4179]], label [[IF_THEN4181:%.*]], label [[IF_END4182:%.*]] +// SIMD-ONLY0: if.then4181: +// SIMD-ONLY0-NEXT: [[TMP2971:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2971]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4182]] +// SIMD-ONLY0: if.end4182: +// SIMD-ONLY0-NEXT: [[TMP2972:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2972]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2973:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2974:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4183:%.*]] = icmp slt i64 [[TMP2973]], [[TMP2974]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4183]], label [[IF_THEN4185:%.*]], label [[IF_END4186:%.*]] +// SIMD-ONLY0: if.then4185: +// SIMD-ONLY0-NEXT: [[TMP2975:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2975]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4186]] +// SIMD-ONLY0: if.end4186: +// SIMD-ONLY0-NEXT: [[TMP2976:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2976]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2977:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2978:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4187:%.*]] = icmp eq i64 [[TMP2977]], [[TMP2978]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4187]], label [[IF_THEN4189:%.*]], label [[IF_END4190:%.*]] +// SIMD-ONLY0: if.then4189: +// SIMD-ONLY0-NEXT: [[TMP2979:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2979]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4190]] +// SIMD-ONLY0: if.end4190: +// SIMD-ONLY0-NEXT: [[TMP2980:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2980]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2981:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2982:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4191:%.*]] = icmp eq i64 [[TMP2981]], [[TMP2982]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4191]], label [[IF_THEN4193:%.*]], label [[IF_END4194:%.*]] +// SIMD-ONLY0: if.then4193: +// SIMD-ONLY0-NEXT: [[TMP2983:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2983]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4194]] +// SIMD-ONLY0: if.end4194: +// SIMD-ONLY0-NEXT: [[TMP2984:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2985:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4195:%.*]] = icmp sgt i64 [[TMP2984]], [[TMP2985]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4195]], label [[IF_THEN4197:%.*]], label [[IF_END4198:%.*]] +// SIMD-ONLY0: if.then4197: +// SIMD-ONLY0-NEXT: [[TMP2986:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2986]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4198]] +// SIMD-ONLY0: if.end4198: +// SIMD-ONLY0-NEXT: [[TMP2987:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2987]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2988:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2989:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4199:%.*]] = icmp sgt i64 [[TMP2988]], [[TMP2989]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4199]], label [[IF_THEN4201:%.*]], label [[IF_END4202:%.*]] +// SIMD-ONLY0: if.then4201: +// SIMD-ONLY0-NEXT: [[TMP2990:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2990]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4202]] +// SIMD-ONLY0: if.end4202: +// SIMD-ONLY0-NEXT: [[TMP2991:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2991]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2992:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2993:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4203:%.*]] = icmp slt i64 [[TMP2992]], [[TMP2993]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4203]], label [[IF_THEN4205:%.*]], label [[IF_END4206:%.*]] +// SIMD-ONLY0: if.then4205: +// SIMD-ONLY0-NEXT: [[TMP2994:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2994]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4206]] +// SIMD-ONLY0: if.end4206: +// SIMD-ONLY0-NEXT: [[TMP2995:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2995]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2996:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2997:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4207:%.*]] = icmp slt i64 [[TMP2996]], [[TMP2997]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4207]], label [[IF_THEN4209:%.*]], label [[IF_END4210:%.*]] +// SIMD-ONLY0: if.then4209: +// SIMD-ONLY0-NEXT: [[TMP2998:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2998]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4210]] +// SIMD-ONLY0: if.end4210: +// SIMD-ONLY0-NEXT: [[TMP2999:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP2999]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3000:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3001:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4211:%.*]] = icmp eq i64 [[TMP3000]], [[TMP3001]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4211]], label [[IF_THEN4213:%.*]], label [[IF_END4214:%.*]] +// SIMD-ONLY0: if.then4213: +// SIMD-ONLY0-NEXT: [[TMP3002:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3002]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4214]] +// SIMD-ONLY0: if.end4214: +// SIMD-ONLY0-NEXT: [[TMP3003:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3003]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3004:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3005:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4215:%.*]] = icmp eq i64 [[TMP3004]], [[TMP3005]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4215]], label [[IF_THEN4217:%.*]], label [[IF_END4218:%.*]] +// SIMD-ONLY0: if.then4217: +// SIMD-ONLY0-NEXT: [[TMP3006:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3006]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4218]] +// SIMD-ONLY0: if.end4218: +// SIMD-ONLY0-NEXT: [[TMP3007:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3007]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3008:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3009:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4219:%.*]] = icmp eq i64 [[TMP3008]], [[TMP3009]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4219]], label [[IF_THEN4221:%.*]], label [[IF_ELSE4222:%.*]] +// SIMD-ONLY0: if.then4221: +// SIMD-ONLY0-NEXT: [[TMP3010:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3010]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4223:%.*]] +// SIMD-ONLY0: if.else4222: +// SIMD-ONLY0-NEXT: [[TMP3011:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3011]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4223]] +// SIMD-ONLY0: if.end4223: +// SIMD-ONLY0-NEXT: [[TMP3012:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3013:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4224:%.*]] = icmp eq i64 [[TMP3012]], [[TMP3013]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4224]], label [[IF_THEN4226:%.*]], label [[IF_ELSE4227:%.*]] +// SIMD-ONLY0: if.then4226: +// SIMD-ONLY0-NEXT: [[TMP3014:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3014]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4228:%.*]] +// SIMD-ONLY0: if.else4227: +// SIMD-ONLY0-NEXT: [[TMP3015:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3015]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4228]] +// SIMD-ONLY0: if.end4228: +// SIMD-ONLY0-NEXT: [[TMP3016:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3017:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4229:%.*]] = icmp eq i64 [[TMP3016]], [[TMP3017]] +// SIMD-ONLY0-NEXT: [[CONV4230:%.*]] = zext i1 [[CMP4229]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4231:%.*]] = sext i32 [[CONV4230]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4231]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3018:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4232:%.*]] = icmp ne i64 [[TMP3018]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4232]], label [[IF_THEN4233:%.*]], label [[IF_END4234:%.*]] +// SIMD-ONLY0: if.then4233: +// SIMD-ONLY0-NEXT: [[TMP3019:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3019]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4234]] +// SIMD-ONLY0: if.end4234: +// SIMD-ONLY0-NEXT: [[TMP3020:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3021:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4235:%.*]] = icmp eq i64 [[TMP3020]], [[TMP3021]] +// SIMD-ONLY0-NEXT: [[CONV4236:%.*]] = zext i1 [[CMP4235]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4237:%.*]] = sext i32 [[CONV4236]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4237]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3022:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4238:%.*]] = icmp ne i64 [[TMP3022]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4238]], label [[IF_THEN4239:%.*]], label [[IF_END4240:%.*]] +// SIMD-ONLY0: if.then4239: +// SIMD-ONLY0-NEXT: [[TMP3023:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3023]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4240]] +// SIMD-ONLY0: if.end4240: +// SIMD-ONLY0-NEXT: [[TMP3024:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3025:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4241:%.*]] = icmp eq i64 [[TMP3024]], [[TMP3025]] +// SIMD-ONLY0-NEXT: [[CONV4242:%.*]] = zext i1 [[CMP4241]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4243:%.*]] = sext i32 [[CONV4242]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4243]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3026:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4244:%.*]] = icmp ne i64 [[TMP3026]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4244]], label [[IF_THEN4245:%.*]], label [[IF_ELSE4246:%.*]] +// SIMD-ONLY0: if.then4245: +// SIMD-ONLY0-NEXT: [[TMP3027:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3027]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4247:%.*]] +// SIMD-ONLY0: if.else4246: +// SIMD-ONLY0-NEXT: [[TMP3028:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3028]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4247]] +// SIMD-ONLY0: if.end4247: +// SIMD-ONLY0-NEXT: [[TMP3029:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3030:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4248:%.*]] = icmp eq i64 [[TMP3029]], [[TMP3030]] +// SIMD-ONLY0-NEXT: [[CONV4249:%.*]] = zext i1 [[CMP4248]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4250:%.*]] = sext i32 [[CONV4249]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4250]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3031:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4251:%.*]] = icmp ne i64 [[TMP3031]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4251]], label [[IF_THEN4252:%.*]], label [[IF_ELSE4253:%.*]] +// SIMD-ONLY0: if.then4252: +// SIMD-ONLY0-NEXT: [[TMP3032:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3032]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4254:%.*]] +// SIMD-ONLY0: if.else4253: +// SIMD-ONLY0-NEXT: [[TMP3033:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3033]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4254]] +// SIMD-ONLY0: if.end4254: +// SIMD-ONLY0-NEXT: [[TMP3034:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3034]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3035:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3036:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4255:%.*]] = icmp sgt i64 [[TMP3035]], [[TMP3036]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4255]], label [[IF_THEN4257:%.*]], label [[IF_END4258:%.*]] +// SIMD-ONLY0: if.then4257: +// SIMD-ONLY0-NEXT: [[TMP3037:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3037]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4258]] +// SIMD-ONLY0: if.end4258: +// SIMD-ONLY0-NEXT: [[TMP3038:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3038]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3039:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3040:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4259:%.*]] = icmp sgt i64 [[TMP3039]], [[TMP3040]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4259]], label [[IF_THEN4261:%.*]], label [[IF_END4262:%.*]] +// SIMD-ONLY0: if.then4261: +// SIMD-ONLY0-NEXT: [[TMP3041:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3041]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4262]] +// SIMD-ONLY0: if.end4262: +// SIMD-ONLY0-NEXT: [[TMP3042:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3042]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3043:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3044:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4263:%.*]] = icmp slt i64 [[TMP3043]], [[TMP3044]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4263]], label [[IF_THEN4265:%.*]], label [[IF_END4266:%.*]] +// SIMD-ONLY0: if.then4265: +// SIMD-ONLY0-NEXT: [[TMP3045:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3045]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4266]] +// SIMD-ONLY0: if.end4266: +// SIMD-ONLY0-NEXT: [[TMP3046:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3046]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3047:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3048:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4267:%.*]] = icmp slt i64 [[TMP3047]], [[TMP3048]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4267]], label [[IF_THEN4269:%.*]], label [[IF_END4270:%.*]] +// SIMD-ONLY0: if.then4269: +// SIMD-ONLY0-NEXT: [[TMP3049:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3049]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4270]] +// SIMD-ONLY0: if.end4270: +// SIMD-ONLY0-NEXT: [[TMP3050:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3050]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3051:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3052:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4271:%.*]] = icmp eq i64 [[TMP3051]], [[TMP3052]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4271]], label [[IF_THEN4273:%.*]], label [[IF_END4274:%.*]] +// SIMD-ONLY0: if.then4273: +// SIMD-ONLY0-NEXT: [[TMP3053:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3053]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4274]] +// SIMD-ONLY0: if.end4274: +// SIMD-ONLY0-NEXT: [[TMP3054:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3054]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3055:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3056:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4275:%.*]] = icmp eq i64 [[TMP3055]], [[TMP3056]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4275]], label [[IF_THEN4277:%.*]], label [[IF_END4278:%.*]] +// SIMD-ONLY0: if.then4277: +// SIMD-ONLY0-NEXT: [[TMP3057:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3057]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4278]] +// SIMD-ONLY0: if.end4278: +// SIMD-ONLY0-NEXT: [[TMP3058:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3059:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4279:%.*]] = icmp sgt i64 [[TMP3058]], [[TMP3059]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4279]], label [[IF_THEN4281:%.*]], label [[IF_END4282:%.*]] +// SIMD-ONLY0: if.then4281: +// SIMD-ONLY0-NEXT: [[TMP3060:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3060]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4282]] +// SIMD-ONLY0: if.end4282: +// SIMD-ONLY0-NEXT: [[TMP3061:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3061]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3062:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3063:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4283:%.*]] = icmp sgt i64 [[TMP3062]], [[TMP3063]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4283]], label [[IF_THEN4285:%.*]], label [[IF_END4286:%.*]] +// SIMD-ONLY0: if.then4285: +// SIMD-ONLY0-NEXT: [[TMP3064:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3064]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4286]] +// SIMD-ONLY0: if.end4286: +// SIMD-ONLY0-NEXT: [[TMP3065:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3065]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3066:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3067:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4287:%.*]] = icmp slt i64 [[TMP3066]], [[TMP3067]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4287]], label [[IF_THEN4289:%.*]], label [[IF_END4290:%.*]] +// SIMD-ONLY0: if.then4289: +// SIMD-ONLY0-NEXT: [[TMP3068:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3068]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4290]] +// SIMD-ONLY0: if.end4290: +// SIMD-ONLY0-NEXT: [[TMP3069:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3069]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3070:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3071:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4291:%.*]] = icmp slt i64 [[TMP3070]], [[TMP3071]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4291]], label [[IF_THEN4293:%.*]], label [[IF_END4294:%.*]] +// SIMD-ONLY0: if.then4293: +// SIMD-ONLY0-NEXT: [[TMP3072:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3072]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4294]] +// SIMD-ONLY0: if.end4294: +// SIMD-ONLY0-NEXT: [[TMP3073:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3073]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3074:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3075:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4295:%.*]] = icmp eq i64 [[TMP3074]], [[TMP3075]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4295]], label [[IF_THEN4297:%.*]], label [[IF_END4298:%.*]] +// SIMD-ONLY0: if.then4297: +// SIMD-ONLY0-NEXT: [[TMP3076:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3076]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4298]] +// SIMD-ONLY0: if.end4298: +// SIMD-ONLY0-NEXT: [[TMP3077:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3077]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3078:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3079:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4299:%.*]] = icmp eq i64 [[TMP3078]], [[TMP3079]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4299]], label [[IF_THEN4301:%.*]], label [[IF_END4302:%.*]] +// SIMD-ONLY0: if.then4301: +// SIMD-ONLY0-NEXT: [[TMP3080:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3080]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4302]] +// SIMD-ONLY0: if.end4302: +// SIMD-ONLY0-NEXT: [[TMP3081:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3081]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3082:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3083:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4303:%.*]] = icmp eq i64 [[TMP3082]], [[TMP3083]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4303]], label [[IF_THEN4305:%.*]], label [[IF_ELSE4306:%.*]] +// SIMD-ONLY0: if.then4305: +// SIMD-ONLY0-NEXT: [[TMP3084:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3084]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4307:%.*]] +// SIMD-ONLY0: if.else4306: +// SIMD-ONLY0-NEXT: [[TMP3085:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3085]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4307]] +// SIMD-ONLY0: if.end4307: +// SIMD-ONLY0-NEXT: [[TMP3086:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3087:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4308:%.*]] = icmp eq i64 [[TMP3086]], [[TMP3087]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4308]], label [[IF_THEN4310:%.*]], label [[IF_ELSE4311:%.*]] +// SIMD-ONLY0: if.then4310: +// SIMD-ONLY0-NEXT: [[TMP3088:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3088]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4312:%.*]] +// SIMD-ONLY0: if.else4311: +// SIMD-ONLY0-NEXT: [[TMP3089:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3089]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4312]] +// SIMD-ONLY0: if.end4312: +// SIMD-ONLY0-NEXT: [[TMP3090:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3091:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4313:%.*]] = icmp eq i64 [[TMP3090]], [[TMP3091]] +// SIMD-ONLY0-NEXT: [[CONV4314:%.*]] = zext i1 [[CMP4313]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4315:%.*]] = sext i32 [[CONV4314]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4315]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3092:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4316:%.*]] = icmp ne i64 [[TMP3092]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4316]], label [[IF_THEN4317:%.*]], label [[IF_END4318:%.*]] +// SIMD-ONLY0: if.then4317: +// SIMD-ONLY0-NEXT: [[TMP3093:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3093]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4318]] +// SIMD-ONLY0: if.end4318: +// SIMD-ONLY0-NEXT: [[TMP3094:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3095:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4319:%.*]] = icmp eq i64 [[TMP3094]], [[TMP3095]] +// SIMD-ONLY0-NEXT: [[CONV4320:%.*]] = zext i1 [[CMP4319]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4321:%.*]] = sext i32 [[CONV4320]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4321]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3096:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4322:%.*]] = icmp ne i64 [[TMP3096]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4322]], label [[IF_THEN4323:%.*]], label [[IF_END4324:%.*]] +// SIMD-ONLY0: if.then4323: +// SIMD-ONLY0-NEXT: [[TMP3097:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3097]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4324]] +// SIMD-ONLY0: if.end4324: +// SIMD-ONLY0-NEXT: [[TMP3098:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3099:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4325:%.*]] = icmp eq i64 [[TMP3098]], [[TMP3099]] +// SIMD-ONLY0-NEXT: [[CONV4326:%.*]] = zext i1 [[CMP4325]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4327:%.*]] = sext i32 [[CONV4326]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4327]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3100:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4328:%.*]] = icmp ne i64 [[TMP3100]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4328]], label [[IF_THEN4329:%.*]], label [[IF_ELSE4330:%.*]] +// SIMD-ONLY0: if.then4329: +// SIMD-ONLY0-NEXT: [[TMP3101:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3101]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4331:%.*]] +// SIMD-ONLY0: if.else4330: +// SIMD-ONLY0-NEXT: [[TMP3102:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3102]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4331]] +// SIMD-ONLY0: if.end4331: +// SIMD-ONLY0-NEXT: [[TMP3103:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3104:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4332:%.*]] = icmp eq i64 [[TMP3103]], [[TMP3104]] +// SIMD-ONLY0-NEXT: [[CONV4333:%.*]] = zext i1 [[CMP4332]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4334:%.*]] = sext i32 [[CONV4333]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4334]], ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3105:%.*]] = load i64, ptr [[LR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4335:%.*]] = icmp ne i64 [[TMP3105]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4335]], label [[IF_THEN4336:%.*]], label [[IF_ELSE4337:%.*]] +// SIMD-ONLY0: if.then4336: +// SIMD-ONLY0-NEXT: [[TMP3106:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3106]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4338:%.*]] +// SIMD-ONLY0: if.else4337: +// SIMD-ONLY0-NEXT: [[TMP3107:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3107]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4338]] +// SIMD-ONLY0: if.end4338: +// SIMD-ONLY0-NEXT: [[TMP3108:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3108]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3109:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3110:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4339:%.*]] = icmp ugt i64 [[TMP3109]], [[TMP3110]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4339]], label [[IF_THEN4341:%.*]], label [[IF_END4342:%.*]] +// SIMD-ONLY0: if.then4341: +// SIMD-ONLY0-NEXT: [[TMP3111:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3111]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4342]] +// SIMD-ONLY0: if.end4342: +// SIMD-ONLY0-NEXT: [[TMP3112:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3112]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3113:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3114:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4343:%.*]] = icmp ugt i64 [[TMP3113]], [[TMP3114]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4343]], label [[IF_THEN4345:%.*]], label [[IF_END4346:%.*]] +// SIMD-ONLY0: if.then4345: +// SIMD-ONLY0-NEXT: [[TMP3115:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3115]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4346]] +// SIMD-ONLY0: if.end4346: +// SIMD-ONLY0-NEXT: [[TMP3116:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3116]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3117:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3118:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4347:%.*]] = icmp ult i64 [[TMP3117]], [[TMP3118]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4347]], label [[IF_THEN4349:%.*]], label [[IF_END4350:%.*]] +// SIMD-ONLY0: if.then4349: +// SIMD-ONLY0-NEXT: [[TMP3119:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3119]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4350]] +// SIMD-ONLY0: if.end4350: +// SIMD-ONLY0-NEXT: [[TMP3120:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3120]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3121:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3122:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4351:%.*]] = icmp ult i64 [[TMP3121]], [[TMP3122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4351]], label [[IF_THEN4353:%.*]], label [[IF_END4354:%.*]] +// SIMD-ONLY0: if.then4353: +// SIMD-ONLY0-NEXT: [[TMP3123:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3123]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4354]] +// SIMD-ONLY0: if.end4354: +// SIMD-ONLY0-NEXT: [[TMP3124:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3124]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3125:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3126:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4355:%.*]] = icmp eq i64 [[TMP3125]], [[TMP3126]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4355]], label [[IF_THEN4357:%.*]], label [[IF_END4358:%.*]] +// SIMD-ONLY0: if.then4357: +// SIMD-ONLY0-NEXT: [[TMP3127:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3127]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4358]] +// SIMD-ONLY0: if.end4358: +// SIMD-ONLY0-NEXT: [[TMP3128:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3128]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3129:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3130:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4359:%.*]] = icmp eq i64 [[TMP3129]], [[TMP3130]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4359]], label [[IF_THEN4361:%.*]], label [[IF_END4362:%.*]] +// SIMD-ONLY0: if.then4361: +// SIMD-ONLY0-NEXT: [[TMP3131:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3131]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4362]] +// SIMD-ONLY0: if.end4362: +// SIMD-ONLY0-NEXT: [[TMP3132:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3133:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4363:%.*]] = icmp ugt i64 [[TMP3132]], [[TMP3133]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4363]], label [[IF_THEN4365:%.*]], label [[IF_END4366:%.*]] +// SIMD-ONLY0: if.then4365: +// SIMD-ONLY0-NEXT: [[TMP3134:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3134]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4366]] +// SIMD-ONLY0: if.end4366: +// SIMD-ONLY0-NEXT: [[TMP3135:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3135]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3136:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3137:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4367:%.*]] = icmp ugt i64 [[TMP3136]], [[TMP3137]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4367]], label [[IF_THEN4369:%.*]], label [[IF_END4370:%.*]] +// SIMD-ONLY0: if.then4369: +// SIMD-ONLY0-NEXT: [[TMP3138:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3138]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4370]] +// SIMD-ONLY0: if.end4370: +// SIMD-ONLY0-NEXT: [[TMP3139:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3139]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3140:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3141:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4371:%.*]] = icmp ult i64 [[TMP3140]], [[TMP3141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4371]], label [[IF_THEN4373:%.*]], label [[IF_END4374:%.*]] +// SIMD-ONLY0: if.then4373: +// SIMD-ONLY0-NEXT: [[TMP3142:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3142]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4374]] +// SIMD-ONLY0: if.end4374: +// SIMD-ONLY0-NEXT: [[TMP3143:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3143]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3144:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3145:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4375:%.*]] = icmp ult i64 [[TMP3144]], [[TMP3145]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4375]], label [[IF_THEN4377:%.*]], label [[IF_END4378:%.*]] +// SIMD-ONLY0: if.then4377: +// SIMD-ONLY0-NEXT: [[TMP3146:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3146]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4378]] +// SIMD-ONLY0: if.end4378: +// SIMD-ONLY0-NEXT: [[TMP3147:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3147]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3148:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3149:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4379:%.*]] = icmp eq i64 [[TMP3148]], [[TMP3149]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4379]], label [[IF_THEN4381:%.*]], label [[IF_END4382:%.*]] +// SIMD-ONLY0: if.then4381: +// SIMD-ONLY0-NEXT: [[TMP3150:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3150]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4382]] +// SIMD-ONLY0: if.end4382: +// SIMD-ONLY0-NEXT: [[TMP3151:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3151]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3152:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3153:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4383:%.*]] = icmp eq i64 [[TMP3152]], [[TMP3153]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4383]], label [[IF_THEN4385:%.*]], label [[IF_END4386:%.*]] +// SIMD-ONLY0: if.then4385: +// SIMD-ONLY0-NEXT: [[TMP3154:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3154]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4386]] +// SIMD-ONLY0: if.end4386: +// SIMD-ONLY0-NEXT: [[TMP3155:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3155]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3156:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3157:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4387:%.*]] = icmp eq i64 [[TMP3156]], [[TMP3157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4387]], label [[IF_THEN4389:%.*]], label [[IF_ELSE4390:%.*]] +// SIMD-ONLY0: if.then4389: +// SIMD-ONLY0-NEXT: [[TMP3158:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3158]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4391:%.*]] +// SIMD-ONLY0: if.else4390: +// SIMD-ONLY0-NEXT: [[TMP3159:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3159]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4391]] +// SIMD-ONLY0: if.end4391: +// SIMD-ONLY0-NEXT: [[TMP3160:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3161:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4392:%.*]] = icmp eq i64 [[TMP3160]], [[TMP3161]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4392]], label [[IF_THEN4394:%.*]], label [[IF_ELSE4395:%.*]] +// SIMD-ONLY0: if.then4394: +// SIMD-ONLY0-NEXT: [[TMP3162:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3162]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4396:%.*]] +// SIMD-ONLY0: if.else4395: +// SIMD-ONLY0-NEXT: [[TMP3163:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3163]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4396]] +// SIMD-ONLY0: if.end4396: +// SIMD-ONLY0-NEXT: [[TMP3164:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3165:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4397:%.*]] = icmp eq i64 [[TMP3164]], [[TMP3165]] +// SIMD-ONLY0-NEXT: [[CONV4398:%.*]] = zext i1 [[CMP4397]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4399:%.*]] = sext i32 [[CONV4398]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4399]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3166:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4400:%.*]] = icmp ne i64 [[TMP3166]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4400]], label [[IF_THEN4401:%.*]], label [[IF_END4402:%.*]] +// SIMD-ONLY0: if.then4401: +// SIMD-ONLY0-NEXT: [[TMP3167:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3167]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4402]] +// SIMD-ONLY0: if.end4402: +// SIMD-ONLY0-NEXT: [[TMP3168:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3169:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4403:%.*]] = icmp eq i64 [[TMP3168]], [[TMP3169]] +// SIMD-ONLY0-NEXT: [[CONV4404:%.*]] = zext i1 [[CMP4403]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4405:%.*]] = sext i32 [[CONV4404]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4405]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3170:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4406:%.*]] = icmp ne i64 [[TMP3170]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4406]], label [[IF_THEN4407:%.*]], label [[IF_END4408:%.*]] +// SIMD-ONLY0: if.then4407: +// SIMD-ONLY0-NEXT: [[TMP3171:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3171]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4408]] +// SIMD-ONLY0: if.end4408: +// SIMD-ONLY0-NEXT: [[TMP3172:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3173:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4409:%.*]] = icmp eq i64 [[TMP3172]], [[TMP3173]] +// SIMD-ONLY0-NEXT: [[CONV4410:%.*]] = zext i1 [[CMP4409]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4411:%.*]] = sext i32 [[CONV4410]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4411]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3174:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4412:%.*]] = icmp ne i64 [[TMP3174]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4412]], label [[IF_THEN4413:%.*]], label [[IF_ELSE4414:%.*]] +// SIMD-ONLY0: if.then4413: +// SIMD-ONLY0-NEXT: [[TMP3175:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3175]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4415:%.*]] +// SIMD-ONLY0: if.else4414: +// SIMD-ONLY0-NEXT: [[TMP3176:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3176]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4415]] +// SIMD-ONLY0: if.end4415: +// SIMD-ONLY0-NEXT: [[TMP3177:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3178:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4416:%.*]] = icmp eq i64 [[TMP3177]], [[TMP3178]] +// SIMD-ONLY0-NEXT: [[CONV4417:%.*]] = zext i1 [[CMP4416]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4418:%.*]] = sext i32 [[CONV4417]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4418]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3179:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4419:%.*]] = icmp ne i64 [[TMP3179]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4419]], label [[IF_THEN4420:%.*]], label [[IF_ELSE4421:%.*]] +// SIMD-ONLY0: if.then4420: +// SIMD-ONLY0-NEXT: [[TMP3180:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3180]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4422:%.*]] +// SIMD-ONLY0: if.else4421: +// SIMD-ONLY0-NEXT: [[TMP3181:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3181]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4422]] +// SIMD-ONLY0: if.end4422: +// SIMD-ONLY0-NEXT: [[TMP3182:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3182]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3183:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3184:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4423:%.*]] = icmp ugt i64 [[TMP3183]], [[TMP3184]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4423]], label [[IF_THEN4425:%.*]], label [[IF_END4426:%.*]] +// SIMD-ONLY0: if.then4425: +// SIMD-ONLY0-NEXT: [[TMP3185:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3185]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4426]] +// SIMD-ONLY0: if.end4426: +// SIMD-ONLY0-NEXT: [[TMP3186:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3186]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3187:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3188:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4427:%.*]] = icmp ugt i64 [[TMP3187]], [[TMP3188]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4427]], label [[IF_THEN4429:%.*]], label [[IF_END4430:%.*]] +// SIMD-ONLY0: if.then4429: +// SIMD-ONLY0-NEXT: [[TMP3189:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3189]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4430]] +// SIMD-ONLY0: if.end4430: +// SIMD-ONLY0-NEXT: [[TMP3190:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3190]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3191:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3192:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4431:%.*]] = icmp ult i64 [[TMP3191]], [[TMP3192]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4431]], label [[IF_THEN4433:%.*]], label [[IF_END4434:%.*]] +// SIMD-ONLY0: if.then4433: +// SIMD-ONLY0-NEXT: [[TMP3193:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3193]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4434]] +// SIMD-ONLY0: if.end4434: +// SIMD-ONLY0-NEXT: [[TMP3194:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3194]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3195:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3196:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4435:%.*]] = icmp ult i64 [[TMP3195]], [[TMP3196]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4435]], label [[IF_THEN4437:%.*]], label [[IF_END4438:%.*]] +// SIMD-ONLY0: if.then4437: +// SIMD-ONLY0-NEXT: [[TMP3197:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3197]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4438]] +// SIMD-ONLY0: if.end4438: +// SIMD-ONLY0-NEXT: [[TMP3198:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3198]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3199:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3200:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4439:%.*]] = icmp eq i64 [[TMP3199]], [[TMP3200]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4439]], label [[IF_THEN4441:%.*]], label [[IF_END4442:%.*]] +// SIMD-ONLY0: if.then4441: +// SIMD-ONLY0-NEXT: [[TMP3201:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3201]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4442]] +// SIMD-ONLY0: if.end4442: +// SIMD-ONLY0-NEXT: [[TMP3202:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3202]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3203:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3204:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4443:%.*]] = icmp eq i64 [[TMP3203]], [[TMP3204]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4443]], label [[IF_THEN4445:%.*]], label [[IF_END4446:%.*]] +// SIMD-ONLY0: if.then4445: +// SIMD-ONLY0-NEXT: [[TMP3205:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3205]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4446]] +// SIMD-ONLY0: if.end4446: +// SIMD-ONLY0-NEXT: [[TMP3206:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3207:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4447:%.*]] = icmp ugt i64 [[TMP3206]], [[TMP3207]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4447]], label [[IF_THEN4449:%.*]], label [[IF_END4450:%.*]] +// SIMD-ONLY0: if.then4449: +// SIMD-ONLY0-NEXT: [[TMP3208:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3208]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4450]] +// SIMD-ONLY0: if.end4450: +// SIMD-ONLY0-NEXT: [[TMP3209:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3209]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3210:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3211:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4451:%.*]] = icmp ugt i64 [[TMP3210]], [[TMP3211]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4451]], label [[IF_THEN4453:%.*]], label [[IF_END4454:%.*]] +// SIMD-ONLY0: if.then4453: +// SIMD-ONLY0-NEXT: [[TMP3212:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3212]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4454]] +// SIMD-ONLY0: if.end4454: +// SIMD-ONLY0-NEXT: [[TMP3213:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3213]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3214:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3215:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4455:%.*]] = icmp ult i64 [[TMP3214]], [[TMP3215]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4455]], label [[IF_THEN4457:%.*]], label [[IF_END4458:%.*]] +// SIMD-ONLY0: if.then4457: +// SIMD-ONLY0-NEXT: [[TMP3216:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3216]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4458]] +// SIMD-ONLY0: if.end4458: +// SIMD-ONLY0-NEXT: [[TMP3217:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3217]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3218:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3219:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4459:%.*]] = icmp ult i64 [[TMP3218]], [[TMP3219]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4459]], label [[IF_THEN4461:%.*]], label [[IF_END4462:%.*]] +// SIMD-ONLY0: if.then4461: +// SIMD-ONLY0-NEXT: [[TMP3220:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3220]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4462]] +// SIMD-ONLY0: if.end4462: +// SIMD-ONLY0-NEXT: [[TMP3221:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3221]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3222:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3223:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4463:%.*]] = icmp eq i64 [[TMP3222]], [[TMP3223]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4463]], label [[IF_THEN4465:%.*]], label [[IF_END4466:%.*]] +// SIMD-ONLY0: if.then4465: +// SIMD-ONLY0-NEXT: [[TMP3224:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3224]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4466]] +// SIMD-ONLY0: if.end4466: +// SIMD-ONLY0-NEXT: [[TMP3225:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3225]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3226:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3227:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4467:%.*]] = icmp eq i64 [[TMP3226]], [[TMP3227]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4467]], label [[IF_THEN4469:%.*]], label [[IF_END4470:%.*]] +// SIMD-ONLY0: if.then4469: +// SIMD-ONLY0-NEXT: [[TMP3228:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3228]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4470]] +// SIMD-ONLY0: if.end4470: +// SIMD-ONLY0-NEXT: [[TMP3229:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3229]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3230:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3231:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4471:%.*]] = icmp eq i64 [[TMP3230]], [[TMP3231]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4471]], label [[IF_THEN4473:%.*]], label [[IF_ELSE4474:%.*]] +// SIMD-ONLY0: if.then4473: +// SIMD-ONLY0-NEXT: [[TMP3232:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3232]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4475:%.*]] +// SIMD-ONLY0: if.else4474: +// SIMD-ONLY0-NEXT: [[TMP3233:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3233]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4475]] +// SIMD-ONLY0: if.end4475: +// SIMD-ONLY0-NEXT: [[TMP3234:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3235:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4476:%.*]] = icmp eq i64 [[TMP3234]], [[TMP3235]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4476]], label [[IF_THEN4478:%.*]], label [[IF_ELSE4479:%.*]] +// SIMD-ONLY0: if.then4478: +// SIMD-ONLY0-NEXT: [[TMP3236:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3236]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4480:%.*]] +// SIMD-ONLY0: if.else4479: +// SIMD-ONLY0-NEXT: [[TMP3237:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3237]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4480]] +// SIMD-ONLY0: if.end4480: +// SIMD-ONLY0-NEXT: [[TMP3238:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3239:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4481:%.*]] = icmp eq i64 [[TMP3238]], [[TMP3239]] +// SIMD-ONLY0-NEXT: [[CONV4482:%.*]] = zext i1 [[CMP4481]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4483:%.*]] = sext i32 [[CONV4482]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4483]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3240:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4484:%.*]] = icmp ne i64 [[TMP3240]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4484]], label [[IF_THEN4485:%.*]], label [[IF_END4486:%.*]] +// SIMD-ONLY0: if.then4485: +// SIMD-ONLY0-NEXT: [[TMP3241:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3241]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4486]] +// SIMD-ONLY0: if.end4486: +// SIMD-ONLY0-NEXT: [[TMP3242:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3243:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4487:%.*]] = icmp eq i64 [[TMP3242]], [[TMP3243]] +// SIMD-ONLY0-NEXT: [[CONV4488:%.*]] = zext i1 [[CMP4487]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4489:%.*]] = sext i32 [[CONV4488]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4489]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3244:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4490:%.*]] = icmp ne i64 [[TMP3244]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4490]], label [[IF_THEN4491:%.*]], label [[IF_END4492:%.*]] +// SIMD-ONLY0: if.then4491: +// SIMD-ONLY0-NEXT: [[TMP3245:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3245]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4492]] +// SIMD-ONLY0: if.end4492: +// SIMD-ONLY0-NEXT: [[TMP3246:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3247:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4493:%.*]] = icmp eq i64 [[TMP3246]], [[TMP3247]] +// SIMD-ONLY0-NEXT: [[CONV4494:%.*]] = zext i1 [[CMP4493]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4495:%.*]] = sext i32 [[CONV4494]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4495]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3248:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4496:%.*]] = icmp ne i64 [[TMP3248]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4496]], label [[IF_THEN4497:%.*]], label [[IF_ELSE4498:%.*]] +// SIMD-ONLY0: if.then4497: +// SIMD-ONLY0-NEXT: [[TMP3249:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3249]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4499:%.*]] +// SIMD-ONLY0: if.else4498: +// SIMD-ONLY0-NEXT: [[TMP3250:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3250]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4499]] +// SIMD-ONLY0: if.end4499: +// SIMD-ONLY0-NEXT: [[TMP3251:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3252:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4500:%.*]] = icmp eq i64 [[TMP3251]], [[TMP3252]] +// SIMD-ONLY0-NEXT: [[CONV4501:%.*]] = zext i1 [[CMP4500]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4502:%.*]] = sext i32 [[CONV4501]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4502]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3253:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4503:%.*]] = icmp ne i64 [[TMP3253]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4503]], label [[IF_THEN4504:%.*]], label [[IF_ELSE4505:%.*]] +// SIMD-ONLY0: if.then4504: +// SIMD-ONLY0-NEXT: [[TMP3254:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3254]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4506:%.*]] +// SIMD-ONLY0: if.else4505: +// SIMD-ONLY0-NEXT: [[TMP3255:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3255]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4506]] +// SIMD-ONLY0: if.end4506: +// SIMD-ONLY0-NEXT: [[TMP3256:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3256]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3257:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3258:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4507:%.*]] = icmp ugt i64 [[TMP3257]], [[TMP3258]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4507]], label [[IF_THEN4509:%.*]], label [[IF_END4510:%.*]] +// SIMD-ONLY0: if.then4509: +// SIMD-ONLY0-NEXT: [[TMP3259:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3259]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4510]] +// SIMD-ONLY0: if.end4510: +// SIMD-ONLY0-NEXT: [[TMP3260:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3260]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3261:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3262:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4511:%.*]] = icmp ugt i64 [[TMP3261]], [[TMP3262]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4511]], label [[IF_THEN4513:%.*]], label [[IF_END4514:%.*]] +// SIMD-ONLY0: if.then4513: +// SIMD-ONLY0-NEXT: [[TMP3263:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3263]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4514]] +// SIMD-ONLY0: if.end4514: +// SIMD-ONLY0-NEXT: [[TMP3264:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3264]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3265:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3266:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4515:%.*]] = icmp ult i64 [[TMP3265]], [[TMP3266]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4515]], label [[IF_THEN4517:%.*]], label [[IF_END4518:%.*]] +// SIMD-ONLY0: if.then4517: +// SIMD-ONLY0-NEXT: [[TMP3267:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3267]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4518]] +// SIMD-ONLY0: if.end4518: +// SIMD-ONLY0-NEXT: [[TMP3268:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3268]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3269:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3270:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4519:%.*]] = icmp ult i64 [[TMP3269]], [[TMP3270]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4519]], label [[IF_THEN4521:%.*]], label [[IF_END4522:%.*]] +// SIMD-ONLY0: if.then4521: +// SIMD-ONLY0-NEXT: [[TMP3271:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3271]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4522]] +// SIMD-ONLY0: if.end4522: +// SIMD-ONLY0-NEXT: [[TMP3272:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3272]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3273:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3274:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4523:%.*]] = icmp eq i64 [[TMP3273]], [[TMP3274]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4523]], label [[IF_THEN4525:%.*]], label [[IF_END4526:%.*]] +// SIMD-ONLY0: if.then4525: +// SIMD-ONLY0-NEXT: [[TMP3275:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3275]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4526]] +// SIMD-ONLY0: if.end4526: +// SIMD-ONLY0-NEXT: [[TMP3276:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3276]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3277:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3278:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4527:%.*]] = icmp eq i64 [[TMP3277]], [[TMP3278]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4527]], label [[IF_THEN4529:%.*]], label [[IF_END4530:%.*]] +// SIMD-ONLY0: if.then4529: +// SIMD-ONLY0-NEXT: [[TMP3279:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3279]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4530]] +// SIMD-ONLY0: if.end4530: +// SIMD-ONLY0-NEXT: [[TMP3280:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3281:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4531:%.*]] = icmp ugt i64 [[TMP3280]], [[TMP3281]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4531]], label [[IF_THEN4533:%.*]], label [[IF_END4534:%.*]] +// SIMD-ONLY0: if.then4533: +// SIMD-ONLY0-NEXT: [[TMP3282:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3282]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4534]] +// SIMD-ONLY0: if.end4534: +// SIMD-ONLY0-NEXT: [[TMP3283:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3283]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3284:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3285:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4535:%.*]] = icmp ugt i64 [[TMP3284]], [[TMP3285]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4535]], label [[IF_THEN4537:%.*]], label [[IF_END4538:%.*]] +// SIMD-ONLY0: if.then4537: +// SIMD-ONLY0-NEXT: [[TMP3286:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3286]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4538]] +// SIMD-ONLY0: if.end4538: +// SIMD-ONLY0-NEXT: [[TMP3287:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3287]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3288:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3289:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4539:%.*]] = icmp ult i64 [[TMP3288]], [[TMP3289]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4539]], label [[IF_THEN4541:%.*]], label [[IF_END4542:%.*]] +// SIMD-ONLY0: if.then4541: +// SIMD-ONLY0-NEXT: [[TMP3290:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3290]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4542]] +// SIMD-ONLY0: if.end4542: +// SIMD-ONLY0-NEXT: [[TMP3291:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3291]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3292:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3293:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4543:%.*]] = icmp ult i64 [[TMP3292]], [[TMP3293]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4543]], label [[IF_THEN4545:%.*]], label [[IF_END4546:%.*]] +// SIMD-ONLY0: if.then4545: +// SIMD-ONLY0-NEXT: [[TMP3294:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3294]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4546]] +// SIMD-ONLY0: if.end4546: +// SIMD-ONLY0-NEXT: [[TMP3295:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3295]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3296:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3297:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4547:%.*]] = icmp eq i64 [[TMP3296]], [[TMP3297]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4547]], label [[IF_THEN4549:%.*]], label [[IF_END4550:%.*]] +// SIMD-ONLY0: if.then4549: +// SIMD-ONLY0-NEXT: [[TMP3298:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3298]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4550]] +// SIMD-ONLY0: if.end4550: +// SIMD-ONLY0-NEXT: [[TMP3299:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3299]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3300:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3301:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4551:%.*]] = icmp eq i64 [[TMP3300]], [[TMP3301]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4551]], label [[IF_THEN4553:%.*]], label [[IF_END4554:%.*]] +// SIMD-ONLY0: if.then4553: +// SIMD-ONLY0-NEXT: [[TMP3302:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3302]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4554]] +// SIMD-ONLY0: if.end4554: +// SIMD-ONLY0-NEXT: [[TMP3303:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3303]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3304:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3305:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4555:%.*]] = icmp eq i64 [[TMP3304]], [[TMP3305]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4555]], label [[IF_THEN4557:%.*]], label [[IF_ELSE4558:%.*]] +// SIMD-ONLY0: if.then4557: +// SIMD-ONLY0-NEXT: [[TMP3306:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3306]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4559:%.*]] +// SIMD-ONLY0: if.else4558: +// SIMD-ONLY0-NEXT: [[TMP3307:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3307]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4559]] +// SIMD-ONLY0: if.end4559: +// SIMD-ONLY0-NEXT: [[TMP3308:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3309:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4560:%.*]] = icmp eq i64 [[TMP3308]], [[TMP3309]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4560]], label [[IF_THEN4562:%.*]], label [[IF_ELSE4563:%.*]] +// SIMD-ONLY0: if.then4562: +// SIMD-ONLY0-NEXT: [[TMP3310:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3310]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4564:%.*]] +// SIMD-ONLY0: if.else4563: +// SIMD-ONLY0-NEXT: [[TMP3311:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3311]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4564]] +// SIMD-ONLY0: if.end4564: +// SIMD-ONLY0-NEXT: [[TMP3312:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3313:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4565:%.*]] = icmp eq i64 [[TMP3312]], [[TMP3313]] +// SIMD-ONLY0-NEXT: [[CONV4566:%.*]] = zext i1 [[CMP4565]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4567:%.*]] = sext i32 [[CONV4566]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4567]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3314:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4568:%.*]] = icmp ne i64 [[TMP3314]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4568]], label [[IF_THEN4569:%.*]], label [[IF_END4570:%.*]] +// SIMD-ONLY0: if.then4569: +// SIMD-ONLY0-NEXT: [[TMP3315:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3315]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4570]] +// SIMD-ONLY0: if.end4570: +// SIMD-ONLY0-NEXT: [[TMP3316:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3317:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4571:%.*]] = icmp eq i64 [[TMP3316]], [[TMP3317]] +// SIMD-ONLY0-NEXT: [[CONV4572:%.*]] = zext i1 [[CMP4571]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4573:%.*]] = sext i32 [[CONV4572]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4573]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3318:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4574:%.*]] = icmp ne i64 [[TMP3318]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4574]], label [[IF_THEN4575:%.*]], label [[IF_END4576:%.*]] +// SIMD-ONLY0: if.then4575: +// SIMD-ONLY0-NEXT: [[TMP3319:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3319]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4576]] +// SIMD-ONLY0: if.end4576: +// SIMD-ONLY0-NEXT: [[TMP3320:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3321:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4577:%.*]] = icmp eq i64 [[TMP3320]], [[TMP3321]] +// SIMD-ONLY0-NEXT: [[CONV4578:%.*]] = zext i1 [[CMP4577]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4579:%.*]] = sext i32 [[CONV4578]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4579]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3322:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4580:%.*]] = icmp ne i64 [[TMP3322]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4580]], label [[IF_THEN4581:%.*]], label [[IF_ELSE4582:%.*]] +// SIMD-ONLY0: if.then4581: +// SIMD-ONLY0-NEXT: [[TMP3323:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3323]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4583:%.*]] +// SIMD-ONLY0: if.else4582: +// SIMD-ONLY0-NEXT: [[TMP3324:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3324]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4583]] +// SIMD-ONLY0: if.end4583: +// SIMD-ONLY0-NEXT: [[TMP3325:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3326:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4584:%.*]] = icmp eq i64 [[TMP3325]], [[TMP3326]] +// SIMD-ONLY0-NEXT: [[CONV4585:%.*]] = zext i1 [[CMP4584]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4586:%.*]] = sext i32 [[CONV4585]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4586]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3327:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4587:%.*]] = icmp ne i64 [[TMP3327]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4587]], label [[IF_THEN4588:%.*]], label [[IF_ELSE4589:%.*]] +// SIMD-ONLY0: if.then4588: +// SIMD-ONLY0-NEXT: [[TMP3328:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3328]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4590:%.*]] +// SIMD-ONLY0: if.else4589: +// SIMD-ONLY0-NEXT: [[TMP3329:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3329]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4590]] +// SIMD-ONLY0: if.end4590: +// SIMD-ONLY0-NEXT: [[TMP3330:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3330]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3331:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3332:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4591:%.*]] = icmp ugt i64 [[TMP3331]], [[TMP3332]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4591]], label [[IF_THEN4593:%.*]], label [[IF_END4594:%.*]] +// SIMD-ONLY0: if.then4593: +// SIMD-ONLY0-NEXT: [[TMP3333:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3333]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4594]] +// SIMD-ONLY0: if.end4594: +// SIMD-ONLY0-NEXT: [[TMP3334:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3334]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3335:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3336:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4595:%.*]] = icmp ugt i64 [[TMP3335]], [[TMP3336]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4595]], label [[IF_THEN4597:%.*]], label [[IF_END4598:%.*]] +// SIMD-ONLY0: if.then4597: +// SIMD-ONLY0-NEXT: [[TMP3337:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3337]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4598]] +// SIMD-ONLY0: if.end4598: +// SIMD-ONLY0-NEXT: [[TMP3338:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3338]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3339:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3340:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4599:%.*]] = icmp ult i64 [[TMP3339]], [[TMP3340]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4599]], label [[IF_THEN4601:%.*]], label [[IF_END4602:%.*]] +// SIMD-ONLY0: if.then4601: +// SIMD-ONLY0-NEXT: [[TMP3341:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3341]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4602]] +// SIMD-ONLY0: if.end4602: +// SIMD-ONLY0-NEXT: [[TMP3342:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3342]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3343:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3344:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4603:%.*]] = icmp ult i64 [[TMP3343]], [[TMP3344]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4603]], label [[IF_THEN4605:%.*]], label [[IF_END4606:%.*]] +// SIMD-ONLY0: if.then4605: +// SIMD-ONLY0-NEXT: [[TMP3345:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3345]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4606]] +// SIMD-ONLY0: if.end4606: +// SIMD-ONLY0-NEXT: [[TMP3346:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3346]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3347:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3348:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4607:%.*]] = icmp eq i64 [[TMP3347]], [[TMP3348]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4607]], label [[IF_THEN4609:%.*]], label [[IF_END4610:%.*]] +// SIMD-ONLY0: if.then4609: +// SIMD-ONLY0-NEXT: [[TMP3349:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3349]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4610]] +// SIMD-ONLY0: if.end4610: +// SIMD-ONLY0-NEXT: [[TMP3350:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3350]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3351:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3352:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4611:%.*]] = icmp eq i64 [[TMP3351]], [[TMP3352]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4611]], label [[IF_THEN4613:%.*]], label [[IF_END4614:%.*]] +// SIMD-ONLY0: if.then4613: +// SIMD-ONLY0-NEXT: [[TMP3353:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3353]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4614]] +// SIMD-ONLY0: if.end4614: +// SIMD-ONLY0-NEXT: [[TMP3354:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3355:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4615:%.*]] = icmp ugt i64 [[TMP3354]], [[TMP3355]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4615]], label [[IF_THEN4617:%.*]], label [[IF_END4618:%.*]] +// SIMD-ONLY0: if.then4617: +// SIMD-ONLY0-NEXT: [[TMP3356:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3356]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4618]] +// SIMD-ONLY0: if.end4618: +// SIMD-ONLY0-NEXT: [[TMP3357:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3357]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3358:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3359:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4619:%.*]] = icmp ugt i64 [[TMP3358]], [[TMP3359]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4619]], label [[IF_THEN4621:%.*]], label [[IF_END4622:%.*]] +// SIMD-ONLY0: if.then4621: +// SIMD-ONLY0-NEXT: [[TMP3360:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3360]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4622]] +// SIMD-ONLY0: if.end4622: +// SIMD-ONLY0-NEXT: [[TMP3361:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3361]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3362:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3363:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4623:%.*]] = icmp ult i64 [[TMP3362]], [[TMP3363]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4623]], label [[IF_THEN4625:%.*]], label [[IF_END4626:%.*]] +// SIMD-ONLY0: if.then4625: +// SIMD-ONLY0-NEXT: [[TMP3364:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3364]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4626]] +// SIMD-ONLY0: if.end4626: +// SIMD-ONLY0-NEXT: [[TMP3365:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3365]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3366:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3367:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4627:%.*]] = icmp ult i64 [[TMP3366]], [[TMP3367]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4627]], label [[IF_THEN4629:%.*]], label [[IF_END4630:%.*]] +// SIMD-ONLY0: if.then4629: +// SIMD-ONLY0-NEXT: [[TMP3368:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3368]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4630]] +// SIMD-ONLY0: if.end4630: +// SIMD-ONLY0-NEXT: [[TMP3369:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3369]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3370:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3371:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4631:%.*]] = icmp eq i64 [[TMP3370]], [[TMP3371]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4631]], label [[IF_THEN4633:%.*]], label [[IF_END4634:%.*]] +// SIMD-ONLY0: if.then4633: +// SIMD-ONLY0-NEXT: [[TMP3372:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3372]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4634]] +// SIMD-ONLY0: if.end4634: +// SIMD-ONLY0-NEXT: [[TMP3373:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3373]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3374:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3375:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4635:%.*]] = icmp eq i64 [[TMP3374]], [[TMP3375]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4635]], label [[IF_THEN4637:%.*]], label [[IF_END4638:%.*]] +// SIMD-ONLY0: if.then4637: +// SIMD-ONLY0-NEXT: [[TMP3376:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3376]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4638]] +// SIMD-ONLY0: if.end4638: +// SIMD-ONLY0-NEXT: [[TMP3377:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3377]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3378:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3379:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4639:%.*]] = icmp eq i64 [[TMP3378]], [[TMP3379]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4639]], label [[IF_THEN4641:%.*]], label [[IF_ELSE4642:%.*]] +// SIMD-ONLY0: if.then4641: +// SIMD-ONLY0-NEXT: [[TMP3380:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3380]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4643:%.*]] +// SIMD-ONLY0: if.else4642: +// SIMD-ONLY0-NEXT: [[TMP3381:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3381]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4643]] +// SIMD-ONLY0: if.end4643: +// SIMD-ONLY0-NEXT: [[TMP3382:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3383:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4644:%.*]] = icmp eq i64 [[TMP3382]], [[TMP3383]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4644]], label [[IF_THEN4646:%.*]], label [[IF_ELSE4647:%.*]] +// SIMD-ONLY0: if.then4646: +// SIMD-ONLY0-NEXT: [[TMP3384:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3384]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4648:%.*]] +// SIMD-ONLY0: if.else4647: +// SIMD-ONLY0-NEXT: [[TMP3385:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3385]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4648]] +// SIMD-ONLY0: if.end4648: +// SIMD-ONLY0-NEXT: [[TMP3386:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3387:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4649:%.*]] = icmp eq i64 [[TMP3386]], [[TMP3387]] +// SIMD-ONLY0-NEXT: [[CONV4650:%.*]] = zext i1 [[CMP4649]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4651:%.*]] = sext i32 [[CONV4650]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4651]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3388:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4652:%.*]] = icmp ne i64 [[TMP3388]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4652]], label [[IF_THEN4653:%.*]], label [[IF_END4654:%.*]] +// SIMD-ONLY0: if.then4653: +// SIMD-ONLY0-NEXT: [[TMP3389:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3389]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4654]] +// SIMD-ONLY0: if.end4654: +// SIMD-ONLY0-NEXT: [[TMP3390:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3391:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4655:%.*]] = icmp eq i64 [[TMP3390]], [[TMP3391]] +// SIMD-ONLY0-NEXT: [[CONV4656:%.*]] = zext i1 [[CMP4655]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4657:%.*]] = sext i32 [[CONV4656]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4657]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3392:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4658:%.*]] = icmp ne i64 [[TMP3392]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4658]], label [[IF_THEN4659:%.*]], label [[IF_END4660:%.*]] +// SIMD-ONLY0: if.then4659: +// SIMD-ONLY0-NEXT: [[TMP3393:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3393]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4660]] +// SIMD-ONLY0: if.end4660: +// SIMD-ONLY0-NEXT: [[TMP3394:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3395:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4661:%.*]] = icmp eq i64 [[TMP3394]], [[TMP3395]] +// SIMD-ONLY0-NEXT: [[CONV4662:%.*]] = zext i1 [[CMP4661]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4663:%.*]] = sext i32 [[CONV4662]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4663]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3396:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4664:%.*]] = icmp ne i64 [[TMP3396]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4664]], label [[IF_THEN4665:%.*]], label [[IF_ELSE4666:%.*]] +// SIMD-ONLY0: if.then4665: +// SIMD-ONLY0-NEXT: [[TMP3397:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3397]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4667:%.*]] +// SIMD-ONLY0: if.else4666: +// SIMD-ONLY0-NEXT: [[TMP3398:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3398]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4667]] +// SIMD-ONLY0: if.end4667: +// SIMD-ONLY0-NEXT: [[TMP3399:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3400:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4668:%.*]] = icmp eq i64 [[TMP3399]], [[TMP3400]] +// SIMD-ONLY0-NEXT: [[CONV4669:%.*]] = zext i1 [[CMP4668]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4670:%.*]] = sext i32 [[CONV4669]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4670]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3401:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4671:%.*]] = icmp ne i64 [[TMP3401]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4671]], label [[IF_THEN4672:%.*]], label [[IF_ELSE4673:%.*]] +// SIMD-ONLY0: if.then4672: +// SIMD-ONLY0-NEXT: [[TMP3402:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3402]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4674:%.*]] +// SIMD-ONLY0: if.else4673: +// SIMD-ONLY0-NEXT: [[TMP3403:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3403]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4674]] +// SIMD-ONLY0: if.end4674: +// SIMD-ONLY0-NEXT: [[TMP3404:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3404]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3405:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3406:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4675:%.*]] = icmp ugt i64 [[TMP3405]], [[TMP3406]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4675]], label [[IF_THEN4677:%.*]], label [[IF_END4678:%.*]] +// SIMD-ONLY0: if.then4677: +// SIMD-ONLY0-NEXT: [[TMP3407:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3407]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4678]] +// SIMD-ONLY0: if.end4678: +// SIMD-ONLY0-NEXT: [[TMP3408:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3408]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3409:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3410:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4679:%.*]] = icmp ugt i64 [[TMP3409]], [[TMP3410]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4679]], label [[IF_THEN4681:%.*]], label [[IF_END4682:%.*]] +// SIMD-ONLY0: if.then4681: +// SIMD-ONLY0-NEXT: [[TMP3411:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3411]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4682]] +// SIMD-ONLY0: if.end4682: +// SIMD-ONLY0-NEXT: [[TMP3412:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3412]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3413:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3414:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4683:%.*]] = icmp ult i64 [[TMP3413]], [[TMP3414]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4683]], label [[IF_THEN4685:%.*]], label [[IF_END4686:%.*]] +// SIMD-ONLY0: if.then4685: +// SIMD-ONLY0-NEXT: [[TMP3415:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3415]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4686]] +// SIMD-ONLY0: if.end4686: +// SIMD-ONLY0-NEXT: [[TMP3416:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3416]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3417:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3418:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4687:%.*]] = icmp ult i64 [[TMP3417]], [[TMP3418]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4687]], label [[IF_THEN4689:%.*]], label [[IF_END4690:%.*]] +// SIMD-ONLY0: if.then4689: +// SIMD-ONLY0-NEXT: [[TMP3419:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3419]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4690]] +// SIMD-ONLY0: if.end4690: +// SIMD-ONLY0-NEXT: [[TMP3420:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3420]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3421:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3422:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4691:%.*]] = icmp eq i64 [[TMP3421]], [[TMP3422]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4691]], label [[IF_THEN4693:%.*]], label [[IF_END4694:%.*]] +// SIMD-ONLY0: if.then4693: +// SIMD-ONLY0-NEXT: [[TMP3423:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3423]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4694]] +// SIMD-ONLY0: if.end4694: +// SIMD-ONLY0-NEXT: [[TMP3424:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3424]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3425:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3426:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4695:%.*]] = icmp eq i64 [[TMP3425]], [[TMP3426]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4695]], label [[IF_THEN4697:%.*]], label [[IF_END4698:%.*]] +// SIMD-ONLY0: if.then4697: +// SIMD-ONLY0-NEXT: [[TMP3427:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3427]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4698]] +// SIMD-ONLY0: if.end4698: +// SIMD-ONLY0-NEXT: [[TMP3428:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3429:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4699:%.*]] = icmp ugt i64 [[TMP3428]], [[TMP3429]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4699]], label [[IF_THEN4701:%.*]], label [[IF_END4702:%.*]] +// SIMD-ONLY0: if.then4701: +// SIMD-ONLY0-NEXT: [[TMP3430:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3430]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4702]] +// SIMD-ONLY0: if.end4702: +// SIMD-ONLY0-NEXT: [[TMP3431:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3431]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3432:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3433:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4703:%.*]] = icmp ugt i64 [[TMP3432]], [[TMP3433]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4703]], label [[IF_THEN4705:%.*]], label [[IF_END4706:%.*]] +// SIMD-ONLY0: if.then4705: +// SIMD-ONLY0-NEXT: [[TMP3434:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3434]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4706]] +// SIMD-ONLY0: if.end4706: +// SIMD-ONLY0-NEXT: [[TMP3435:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3435]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3436:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3437:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4707:%.*]] = icmp ult i64 [[TMP3436]], [[TMP3437]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4707]], label [[IF_THEN4709:%.*]], label [[IF_END4710:%.*]] +// SIMD-ONLY0: if.then4709: +// SIMD-ONLY0-NEXT: [[TMP3438:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3438]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4710]] +// SIMD-ONLY0: if.end4710: +// SIMD-ONLY0-NEXT: [[TMP3439:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3439]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3440:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3441:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4711:%.*]] = icmp ult i64 [[TMP3440]], [[TMP3441]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4711]], label [[IF_THEN4713:%.*]], label [[IF_END4714:%.*]] +// SIMD-ONLY0: if.then4713: +// SIMD-ONLY0-NEXT: [[TMP3442:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3442]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4714]] +// SIMD-ONLY0: if.end4714: +// SIMD-ONLY0-NEXT: [[TMP3443:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3443]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3444:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3445:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4715:%.*]] = icmp eq i64 [[TMP3444]], [[TMP3445]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4715]], label [[IF_THEN4717:%.*]], label [[IF_END4718:%.*]] +// SIMD-ONLY0: if.then4717: +// SIMD-ONLY0-NEXT: [[TMP3446:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3446]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4718]] +// SIMD-ONLY0: if.end4718: +// SIMD-ONLY0-NEXT: [[TMP3447:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3447]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3448:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3449:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4719:%.*]] = icmp eq i64 [[TMP3448]], [[TMP3449]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4719]], label [[IF_THEN4721:%.*]], label [[IF_END4722:%.*]] +// SIMD-ONLY0: if.then4721: +// SIMD-ONLY0-NEXT: [[TMP3450:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3450]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4722]] +// SIMD-ONLY0: if.end4722: +// SIMD-ONLY0-NEXT: [[TMP3451:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3451]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3452:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3453:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4723:%.*]] = icmp eq i64 [[TMP3452]], [[TMP3453]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4723]], label [[IF_THEN4725:%.*]], label [[IF_ELSE4726:%.*]] +// SIMD-ONLY0: if.then4725: +// SIMD-ONLY0-NEXT: [[TMP3454:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3454]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4727:%.*]] +// SIMD-ONLY0: if.else4726: +// SIMD-ONLY0-NEXT: [[TMP3455:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3455]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4727]] +// SIMD-ONLY0: if.end4727: +// SIMD-ONLY0-NEXT: [[TMP3456:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3457:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4728:%.*]] = icmp eq i64 [[TMP3456]], [[TMP3457]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4728]], label [[IF_THEN4730:%.*]], label [[IF_ELSE4731:%.*]] +// SIMD-ONLY0: if.then4730: +// SIMD-ONLY0-NEXT: [[TMP3458:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3458]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4732:%.*]] +// SIMD-ONLY0: if.else4731: +// SIMD-ONLY0-NEXT: [[TMP3459:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3459]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4732]] +// SIMD-ONLY0: if.end4732: +// SIMD-ONLY0-NEXT: [[TMP3460:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3461:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4733:%.*]] = icmp eq i64 [[TMP3460]], [[TMP3461]] +// SIMD-ONLY0-NEXT: [[CONV4734:%.*]] = zext i1 [[CMP4733]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4735:%.*]] = sext i32 [[CONV4734]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4735]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3462:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4736:%.*]] = icmp ne i64 [[TMP3462]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4736]], label [[IF_THEN4737:%.*]], label [[IF_END4738:%.*]] +// SIMD-ONLY0: if.then4737: +// SIMD-ONLY0-NEXT: [[TMP3463:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3463]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4738]] +// SIMD-ONLY0: if.end4738: +// SIMD-ONLY0-NEXT: [[TMP3464:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3465:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4739:%.*]] = icmp eq i64 [[TMP3464]], [[TMP3465]] +// SIMD-ONLY0-NEXT: [[CONV4740:%.*]] = zext i1 [[CMP4739]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4741:%.*]] = sext i32 [[CONV4740]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4741]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3466:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4742:%.*]] = icmp ne i64 [[TMP3466]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4742]], label [[IF_THEN4743:%.*]], label [[IF_END4744:%.*]] +// SIMD-ONLY0: if.then4743: +// SIMD-ONLY0-NEXT: [[TMP3467:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3467]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4744]] +// SIMD-ONLY0: if.end4744: +// SIMD-ONLY0-NEXT: [[TMP3468:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3469:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4745:%.*]] = icmp eq i64 [[TMP3468]], [[TMP3469]] +// SIMD-ONLY0-NEXT: [[CONV4746:%.*]] = zext i1 [[CMP4745]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4747:%.*]] = sext i32 [[CONV4746]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4747]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3470:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4748:%.*]] = icmp ne i64 [[TMP3470]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4748]], label [[IF_THEN4749:%.*]], label [[IF_ELSE4750:%.*]] +// SIMD-ONLY0: if.then4749: +// SIMD-ONLY0-NEXT: [[TMP3471:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3471]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4751:%.*]] +// SIMD-ONLY0: if.else4750: +// SIMD-ONLY0-NEXT: [[TMP3472:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3472]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4751]] +// SIMD-ONLY0: if.end4751: +// SIMD-ONLY0-NEXT: [[TMP3473:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3474:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4752:%.*]] = icmp eq i64 [[TMP3473]], [[TMP3474]] +// SIMD-ONLY0-NEXT: [[CONV4753:%.*]] = zext i1 [[CMP4752]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4754:%.*]] = sext i32 [[CONV4753]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4754]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3475:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4755:%.*]] = icmp ne i64 [[TMP3475]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4755]], label [[IF_THEN4756:%.*]], label [[IF_ELSE4757:%.*]] +// SIMD-ONLY0: if.then4756: +// SIMD-ONLY0-NEXT: [[TMP3476:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3476]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4758:%.*]] +// SIMD-ONLY0: if.else4757: +// SIMD-ONLY0-NEXT: [[TMP3477:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3477]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4758]] +// SIMD-ONLY0: if.end4758: +// SIMD-ONLY0-NEXT: [[TMP3478:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3478]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3479:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3480:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4759:%.*]] = icmp ugt i64 [[TMP3479]], [[TMP3480]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4759]], label [[IF_THEN4761:%.*]], label [[IF_END4762:%.*]] +// SIMD-ONLY0: if.then4761: +// SIMD-ONLY0-NEXT: [[TMP3481:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3481]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4762]] +// SIMD-ONLY0: if.end4762: +// SIMD-ONLY0-NEXT: [[TMP3482:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3482]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3483:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3484:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4763:%.*]] = icmp ugt i64 [[TMP3483]], [[TMP3484]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4763]], label [[IF_THEN4765:%.*]], label [[IF_END4766:%.*]] +// SIMD-ONLY0: if.then4765: +// SIMD-ONLY0-NEXT: [[TMP3485:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3485]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4766]] +// SIMD-ONLY0: if.end4766: +// SIMD-ONLY0-NEXT: [[TMP3486:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3486]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3487:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3488:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4767:%.*]] = icmp ult i64 [[TMP3487]], [[TMP3488]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4767]], label [[IF_THEN4769:%.*]], label [[IF_END4770:%.*]] +// SIMD-ONLY0: if.then4769: +// SIMD-ONLY0-NEXT: [[TMP3489:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3489]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4770]] +// SIMD-ONLY0: if.end4770: +// SIMD-ONLY0-NEXT: [[TMP3490:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3490]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3491:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3492:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4771:%.*]] = icmp ult i64 [[TMP3491]], [[TMP3492]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4771]], label [[IF_THEN4773:%.*]], label [[IF_END4774:%.*]] +// SIMD-ONLY0: if.then4773: +// SIMD-ONLY0-NEXT: [[TMP3493:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3493]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4774]] +// SIMD-ONLY0: if.end4774: +// SIMD-ONLY0-NEXT: [[TMP3494:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3494]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3495:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3496:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4775:%.*]] = icmp eq i64 [[TMP3495]], [[TMP3496]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4775]], label [[IF_THEN4777:%.*]], label [[IF_END4778:%.*]] +// SIMD-ONLY0: if.then4777: +// SIMD-ONLY0-NEXT: [[TMP3497:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3497]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4778]] +// SIMD-ONLY0: if.end4778: +// SIMD-ONLY0-NEXT: [[TMP3498:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3498]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3499:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3500:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4779:%.*]] = icmp eq i64 [[TMP3499]], [[TMP3500]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4779]], label [[IF_THEN4781:%.*]], label [[IF_END4782:%.*]] +// SIMD-ONLY0: if.then4781: +// SIMD-ONLY0-NEXT: [[TMP3501:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3501]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4782]] +// SIMD-ONLY0: if.end4782: +// SIMD-ONLY0-NEXT: [[TMP3502:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3503:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4783:%.*]] = icmp ugt i64 [[TMP3502]], [[TMP3503]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4783]], label [[IF_THEN4785:%.*]], label [[IF_END4786:%.*]] +// SIMD-ONLY0: if.then4785: +// SIMD-ONLY0-NEXT: [[TMP3504:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3504]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4786]] +// SIMD-ONLY0: if.end4786: +// SIMD-ONLY0-NEXT: [[TMP3505:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3505]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3506:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3507:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4787:%.*]] = icmp ugt i64 [[TMP3506]], [[TMP3507]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4787]], label [[IF_THEN4789:%.*]], label [[IF_END4790:%.*]] +// SIMD-ONLY0: if.then4789: +// SIMD-ONLY0-NEXT: [[TMP3508:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3508]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4790]] +// SIMD-ONLY0: if.end4790: +// SIMD-ONLY0-NEXT: [[TMP3509:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3509]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3510:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3511:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4791:%.*]] = icmp ult i64 [[TMP3510]], [[TMP3511]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4791]], label [[IF_THEN4793:%.*]], label [[IF_END4794:%.*]] +// SIMD-ONLY0: if.then4793: +// SIMD-ONLY0-NEXT: [[TMP3512:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3512]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4794]] +// SIMD-ONLY0: if.end4794: +// SIMD-ONLY0-NEXT: [[TMP3513:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3513]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3514:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3515:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4795:%.*]] = icmp ult i64 [[TMP3514]], [[TMP3515]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4795]], label [[IF_THEN4797:%.*]], label [[IF_END4798:%.*]] +// SIMD-ONLY0: if.then4797: +// SIMD-ONLY0-NEXT: [[TMP3516:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3516]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4798]] +// SIMD-ONLY0: if.end4798: +// SIMD-ONLY0-NEXT: [[TMP3517:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3517]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3518:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3519:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4799:%.*]] = icmp eq i64 [[TMP3518]], [[TMP3519]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4799]], label [[IF_THEN4801:%.*]], label [[IF_END4802:%.*]] +// SIMD-ONLY0: if.then4801: +// SIMD-ONLY0-NEXT: [[TMP3520:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3520]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4802]] +// SIMD-ONLY0: if.end4802: +// SIMD-ONLY0-NEXT: [[TMP3521:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3521]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3522:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3523:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4803:%.*]] = icmp eq i64 [[TMP3522]], [[TMP3523]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4803]], label [[IF_THEN4805:%.*]], label [[IF_END4806:%.*]] +// SIMD-ONLY0: if.then4805: +// SIMD-ONLY0-NEXT: [[TMP3524:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3524]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4806]] +// SIMD-ONLY0: if.end4806: +// SIMD-ONLY0-NEXT: [[TMP3525:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3525]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3526:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3527:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4807:%.*]] = icmp eq i64 [[TMP3526]], [[TMP3527]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4807]], label [[IF_THEN4809:%.*]], label [[IF_ELSE4810:%.*]] +// SIMD-ONLY0: if.then4809: +// SIMD-ONLY0-NEXT: [[TMP3528:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3528]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4811:%.*]] +// SIMD-ONLY0: if.else4810: +// SIMD-ONLY0-NEXT: [[TMP3529:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3529]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4811]] +// SIMD-ONLY0: if.end4811: +// SIMD-ONLY0-NEXT: [[TMP3530:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3531:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4812:%.*]] = icmp eq i64 [[TMP3530]], [[TMP3531]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4812]], label [[IF_THEN4814:%.*]], label [[IF_ELSE4815:%.*]] +// SIMD-ONLY0: if.then4814: +// SIMD-ONLY0-NEXT: [[TMP3532:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3532]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4816:%.*]] +// SIMD-ONLY0: if.else4815: +// SIMD-ONLY0-NEXT: [[TMP3533:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3533]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4816]] +// SIMD-ONLY0: if.end4816: +// SIMD-ONLY0-NEXT: [[TMP3534:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3535:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4817:%.*]] = icmp eq i64 [[TMP3534]], [[TMP3535]] +// SIMD-ONLY0-NEXT: [[CONV4818:%.*]] = zext i1 [[CMP4817]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4819:%.*]] = sext i32 [[CONV4818]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4819]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3536:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4820:%.*]] = icmp ne i64 [[TMP3536]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4820]], label [[IF_THEN4821:%.*]], label [[IF_END4822:%.*]] +// SIMD-ONLY0: if.then4821: +// SIMD-ONLY0-NEXT: [[TMP3537:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3537]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4822]] +// SIMD-ONLY0: if.end4822: +// SIMD-ONLY0-NEXT: [[TMP3538:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3539:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4823:%.*]] = icmp eq i64 [[TMP3538]], [[TMP3539]] +// SIMD-ONLY0-NEXT: [[CONV4824:%.*]] = zext i1 [[CMP4823]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4825:%.*]] = sext i32 [[CONV4824]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4825]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3540:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4826:%.*]] = icmp ne i64 [[TMP3540]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4826]], label [[IF_THEN4827:%.*]], label [[IF_END4828:%.*]] +// SIMD-ONLY0: if.then4827: +// SIMD-ONLY0-NEXT: [[TMP3541:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3541]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4828]] +// SIMD-ONLY0: if.end4828: +// SIMD-ONLY0-NEXT: [[TMP3542:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3543:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4829:%.*]] = icmp eq i64 [[TMP3542]], [[TMP3543]] +// SIMD-ONLY0-NEXT: [[CONV4830:%.*]] = zext i1 [[CMP4829]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4831:%.*]] = sext i32 [[CONV4830]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4831]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3544:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4832:%.*]] = icmp ne i64 [[TMP3544]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4832]], label [[IF_THEN4833:%.*]], label [[IF_ELSE4834:%.*]] +// SIMD-ONLY0: if.then4833: +// SIMD-ONLY0-NEXT: [[TMP3545:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3545]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4835:%.*]] +// SIMD-ONLY0: if.else4834: +// SIMD-ONLY0-NEXT: [[TMP3546:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3546]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4835]] +// SIMD-ONLY0: if.end4835: +// SIMD-ONLY0-NEXT: [[TMP3547:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3548:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4836:%.*]] = icmp eq i64 [[TMP3547]], [[TMP3548]] +// SIMD-ONLY0-NEXT: [[CONV4837:%.*]] = zext i1 [[CMP4836]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4838:%.*]] = sext i32 [[CONV4837]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4838]], ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3549:%.*]] = load i64, ptr [[ULR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4839:%.*]] = icmp ne i64 [[TMP3549]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4839]], label [[IF_THEN4840:%.*]], label [[IF_ELSE4841:%.*]] +// SIMD-ONLY0: if.then4840: +// SIMD-ONLY0-NEXT: [[TMP3550:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3550]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4842:%.*]] +// SIMD-ONLY0: if.else4841: +// SIMD-ONLY0-NEXT: [[TMP3551:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3551]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4842]] +// SIMD-ONLY0: if.end4842: +// SIMD-ONLY0-NEXT: [[TMP3552:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3552]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3553:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3554:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4843:%.*]] = icmp sgt i64 [[TMP3553]], [[TMP3554]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4843]], label [[IF_THEN4845:%.*]], label [[IF_END4846:%.*]] +// SIMD-ONLY0: if.then4845: +// SIMD-ONLY0-NEXT: [[TMP3555:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3555]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4846]] +// SIMD-ONLY0: if.end4846: +// SIMD-ONLY0-NEXT: [[TMP3556:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3556]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3557:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3558:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4847:%.*]] = icmp sgt i64 [[TMP3557]], [[TMP3558]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4847]], label [[IF_THEN4849:%.*]], label [[IF_END4850:%.*]] +// SIMD-ONLY0: if.then4849: +// SIMD-ONLY0-NEXT: [[TMP3559:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3559]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4850]] +// SIMD-ONLY0: if.end4850: +// SIMD-ONLY0-NEXT: [[TMP3560:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3560]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3561:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3562:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4851:%.*]] = icmp slt i64 [[TMP3561]], [[TMP3562]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4851]], label [[IF_THEN4853:%.*]], label [[IF_END4854:%.*]] +// SIMD-ONLY0: if.then4853: +// SIMD-ONLY0-NEXT: [[TMP3563:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3563]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4854]] +// SIMD-ONLY0: if.end4854: +// SIMD-ONLY0-NEXT: [[TMP3564:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3564]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3565:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3566:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4855:%.*]] = icmp slt i64 [[TMP3565]], [[TMP3566]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4855]], label [[IF_THEN4857:%.*]], label [[IF_END4858:%.*]] +// SIMD-ONLY0: if.then4857: +// SIMD-ONLY0-NEXT: [[TMP3567:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3567]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4858]] +// SIMD-ONLY0: if.end4858: +// SIMD-ONLY0-NEXT: [[TMP3568:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3568]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3569:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3570:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4859:%.*]] = icmp eq i64 [[TMP3569]], [[TMP3570]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4859]], label [[IF_THEN4861:%.*]], label [[IF_END4862:%.*]] +// SIMD-ONLY0: if.then4861: +// SIMD-ONLY0-NEXT: [[TMP3571:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3571]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4862]] +// SIMD-ONLY0: if.end4862: +// SIMD-ONLY0-NEXT: [[TMP3572:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3572]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3573:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3574:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4863:%.*]] = icmp eq i64 [[TMP3573]], [[TMP3574]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4863]], label [[IF_THEN4865:%.*]], label [[IF_END4866:%.*]] +// SIMD-ONLY0: if.then4865: +// SIMD-ONLY0-NEXT: [[TMP3575:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3575]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4866]] +// SIMD-ONLY0: if.end4866: +// SIMD-ONLY0-NEXT: [[TMP3576:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3577:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4867:%.*]] = icmp sgt i64 [[TMP3576]], [[TMP3577]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4867]], label [[IF_THEN4869:%.*]], label [[IF_END4870:%.*]] +// SIMD-ONLY0: if.then4869: +// SIMD-ONLY0-NEXT: [[TMP3578:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3578]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4870]] +// SIMD-ONLY0: if.end4870: +// SIMD-ONLY0-NEXT: [[TMP3579:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3579]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3580:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3581:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4871:%.*]] = icmp sgt i64 [[TMP3580]], [[TMP3581]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4871]], label [[IF_THEN4873:%.*]], label [[IF_END4874:%.*]] +// SIMD-ONLY0: if.then4873: +// SIMD-ONLY0-NEXT: [[TMP3582:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3582]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4874]] +// SIMD-ONLY0: if.end4874: +// SIMD-ONLY0-NEXT: [[TMP3583:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3583]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3584:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3585:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4875:%.*]] = icmp slt i64 [[TMP3584]], [[TMP3585]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4875]], label [[IF_THEN4877:%.*]], label [[IF_END4878:%.*]] +// SIMD-ONLY0: if.then4877: +// SIMD-ONLY0-NEXT: [[TMP3586:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3586]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4878]] +// SIMD-ONLY0: if.end4878: +// SIMD-ONLY0-NEXT: [[TMP3587:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3587]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3588:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3589:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4879:%.*]] = icmp slt i64 [[TMP3588]], [[TMP3589]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4879]], label [[IF_THEN4881:%.*]], label [[IF_END4882:%.*]] +// SIMD-ONLY0: if.then4881: +// SIMD-ONLY0-NEXT: [[TMP3590:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3590]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4882]] +// SIMD-ONLY0: if.end4882: +// SIMD-ONLY0-NEXT: [[TMP3591:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3591]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3592:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3593:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4883:%.*]] = icmp eq i64 [[TMP3592]], [[TMP3593]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4883]], label [[IF_THEN4885:%.*]], label [[IF_END4886:%.*]] +// SIMD-ONLY0: if.then4885: +// SIMD-ONLY0-NEXT: [[TMP3594:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3594]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4886]] +// SIMD-ONLY0: if.end4886: +// SIMD-ONLY0-NEXT: [[TMP3595:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3595]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3596:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3597:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4887:%.*]] = icmp eq i64 [[TMP3596]], [[TMP3597]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4887]], label [[IF_THEN4889:%.*]], label [[IF_END4890:%.*]] +// SIMD-ONLY0: if.then4889: +// SIMD-ONLY0-NEXT: [[TMP3598:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3598]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4890]] +// SIMD-ONLY0: if.end4890: +// SIMD-ONLY0-NEXT: [[TMP3599:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3599]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3600:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3601:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4891:%.*]] = icmp eq i64 [[TMP3600]], [[TMP3601]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4891]], label [[IF_THEN4893:%.*]], label [[IF_ELSE4894:%.*]] +// SIMD-ONLY0: if.then4893: +// SIMD-ONLY0-NEXT: [[TMP3602:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3602]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4895:%.*]] +// SIMD-ONLY0: if.else4894: +// SIMD-ONLY0-NEXT: [[TMP3603:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3603]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4895]] +// SIMD-ONLY0: if.end4895: +// SIMD-ONLY0-NEXT: [[TMP3604:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3605:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4896:%.*]] = icmp eq i64 [[TMP3604]], [[TMP3605]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4896]], label [[IF_THEN4898:%.*]], label [[IF_ELSE4899:%.*]] +// SIMD-ONLY0: if.then4898: +// SIMD-ONLY0-NEXT: [[TMP3606:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3606]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4900:%.*]] +// SIMD-ONLY0: if.else4899: +// SIMD-ONLY0-NEXT: [[TMP3607:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3607]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4900]] +// SIMD-ONLY0: if.end4900: +// SIMD-ONLY0-NEXT: [[TMP3608:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3609:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4901:%.*]] = icmp eq i64 [[TMP3608]], [[TMP3609]] +// SIMD-ONLY0-NEXT: [[CONV4902:%.*]] = zext i1 [[CMP4901]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4903:%.*]] = sext i32 [[CONV4902]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4903]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3610:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4904:%.*]] = icmp ne i64 [[TMP3610]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4904]], label [[IF_THEN4905:%.*]], label [[IF_END4906:%.*]] +// SIMD-ONLY0: if.then4905: +// SIMD-ONLY0-NEXT: [[TMP3611:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3611]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4906]] +// SIMD-ONLY0: if.end4906: +// SIMD-ONLY0-NEXT: [[TMP3612:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3613:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4907:%.*]] = icmp eq i64 [[TMP3612]], [[TMP3613]] +// SIMD-ONLY0-NEXT: [[CONV4908:%.*]] = zext i1 [[CMP4907]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4909:%.*]] = sext i32 [[CONV4908]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4909]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3614:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4910:%.*]] = icmp ne i64 [[TMP3614]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4910]], label [[IF_THEN4911:%.*]], label [[IF_END4912:%.*]] +// SIMD-ONLY0: if.then4911: +// SIMD-ONLY0-NEXT: [[TMP3615:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3615]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4912]] +// SIMD-ONLY0: if.end4912: +// SIMD-ONLY0-NEXT: [[TMP3616:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3617:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4913:%.*]] = icmp eq i64 [[TMP3616]], [[TMP3617]] +// SIMD-ONLY0-NEXT: [[CONV4914:%.*]] = zext i1 [[CMP4913]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4915:%.*]] = sext i32 [[CONV4914]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4915]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3618:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4916:%.*]] = icmp ne i64 [[TMP3618]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4916]], label [[IF_THEN4917:%.*]], label [[IF_ELSE4918:%.*]] +// SIMD-ONLY0: if.then4917: +// SIMD-ONLY0-NEXT: [[TMP3619:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3619]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4919:%.*]] +// SIMD-ONLY0: if.else4918: +// SIMD-ONLY0-NEXT: [[TMP3620:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3620]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4919]] +// SIMD-ONLY0: if.end4919: +// SIMD-ONLY0-NEXT: [[TMP3621:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3622:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4920:%.*]] = icmp eq i64 [[TMP3621]], [[TMP3622]] +// SIMD-ONLY0-NEXT: [[CONV4921:%.*]] = zext i1 [[CMP4920]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4922:%.*]] = sext i32 [[CONV4921]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4922]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3623:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4923:%.*]] = icmp ne i64 [[TMP3623]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4923]], label [[IF_THEN4924:%.*]], label [[IF_ELSE4925:%.*]] +// SIMD-ONLY0: if.then4924: +// SIMD-ONLY0-NEXT: [[TMP3624:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3624]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4926:%.*]] +// SIMD-ONLY0: if.else4925: +// SIMD-ONLY0-NEXT: [[TMP3625:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3625]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4926]] +// SIMD-ONLY0: if.end4926: +// SIMD-ONLY0-NEXT: [[TMP3626:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3626]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3627:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3628:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4927:%.*]] = icmp sgt i64 [[TMP3627]], [[TMP3628]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4927]], label [[IF_THEN4929:%.*]], label [[IF_END4930:%.*]] +// SIMD-ONLY0: if.then4929: +// SIMD-ONLY0-NEXT: [[TMP3629:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3629]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4930]] +// SIMD-ONLY0: if.end4930: +// SIMD-ONLY0-NEXT: [[TMP3630:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3630]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3631:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3632:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4931:%.*]] = icmp sgt i64 [[TMP3631]], [[TMP3632]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4931]], label [[IF_THEN4933:%.*]], label [[IF_END4934:%.*]] +// SIMD-ONLY0: if.then4933: +// SIMD-ONLY0-NEXT: [[TMP3633:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3633]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4934]] +// SIMD-ONLY0: if.end4934: +// SIMD-ONLY0-NEXT: [[TMP3634:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3634]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3635:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3636:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4935:%.*]] = icmp slt i64 [[TMP3635]], [[TMP3636]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4935]], label [[IF_THEN4937:%.*]], label [[IF_END4938:%.*]] +// SIMD-ONLY0: if.then4937: +// SIMD-ONLY0-NEXT: [[TMP3637:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3637]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4938]] +// SIMD-ONLY0: if.end4938: +// SIMD-ONLY0-NEXT: [[TMP3638:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3638]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3639:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3640:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4939:%.*]] = icmp slt i64 [[TMP3639]], [[TMP3640]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4939]], label [[IF_THEN4941:%.*]], label [[IF_END4942:%.*]] +// SIMD-ONLY0: if.then4941: +// SIMD-ONLY0-NEXT: [[TMP3641:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3641]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4942]] +// SIMD-ONLY0: if.end4942: +// SIMD-ONLY0-NEXT: [[TMP3642:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3642]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3643:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3644:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4943:%.*]] = icmp eq i64 [[TMP3643]], [[TMP3644]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4943]], label [[IF_THEN4945:%.*]], label [[IF_END4946:%.*]] +// SIMD-ONLY0: if.then4945: +// SIMD-ONLY0-NEXT: [[TMP3645:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3645]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4946]] +// SIMD-ONLY0: if.end4946: +// SIMD-ONLY0-NEXT: [[TMP3646:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3646]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3647:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3648:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4947:%.*]] = icmp eq i64 [[TMP3647]], [[TMP3648]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4947]], label [[IF_THEN4949:%.*]], label [[IF_END4950:%.*]] +// SIMD-ONLY0: if.then4949: +// SIMD-ONLY0-NEXT: [[TMP3649:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3649]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4950]] +// SIMD-ONLY0: if.end4950: +// SIMD-ONLY0-NEXT: [[TMP3650:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3651:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4951:%.*]] = icmp sgt i64 [[TMP3650]], [[TMP3651]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4951]], label [[IF_THEN4953:%.*]], label [[IF_END4954:%.*]] +// SIMD-ONLY0: if.then4953: +// SIMD-ONLY0-NEXT: [[TMP3652:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3652]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4954]] +// SIMD-ONLY0: if.end4954: +// SIMD-ONLY0-NEXT: [[TMP3653:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3653]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3654:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3655:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4955:%.*]] = icmp sgt i64 [[TMP3654]], [[TMP3655]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4955]], label [[IF_THEN4957:%.*]], label [[IF_END4958:%.*]] +// SIMD-ONLY0: if.then4957: +// SIMD-ONLY0-NEXT: [[TMP3656:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3656]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4958]] +// SIMD-ONLY0: if.end4958: +// SIMD-ONLY0-NEXT: [[TMP3657:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3657]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3658:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3659:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4959:%.*]] = icmp slt i64 [[TMP3658]], [[TMP3659]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4959]], label [[IF_THEN4961:%.*]], label [[IF_END4962:%.*]] +// SIMD-ONLY0: if.then4961: +// SIMD-ONLY0-NEXT: [[TMP3660:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3660]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4962]] +// SIMD-ONLY0: if.end4962: +// SIMD-ONLY0-NEXT: [[TMP3661:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3661]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3662:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3663:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4963:%.*]] = icmp slt i64 [[TMP3662]], [[TMP3663]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4963]], label [[IF_THEN4965:%.*]], label [[IF_END4966:%.*]] +// SIMD-ONLY0: if.then4965: +// SIMD-ONLY0-NEXT: [[TMP3664:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3664]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4966]] +// SIMD-ONLY0: if.end4966: +// SIMD-ONLY0-NEXT: [[TMP3665:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3665]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3666:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3667:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4967:%.*]] = icmp eq i64 [[TMP3666]], [[TMP3667]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4967]], label [[IF_THEN4969:%.*]], label [[IF_END4970:%.*]] +// SIMD-ONLY0: if.then4969: +// SIMD-ONLY0-NEXT: [[TMP3668:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3668]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4970]] +// SIMD-ONLY0: if.end4970: +// SIMD-ONLY0-NEXT: [[TMP3669:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3669]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3670:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3671:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4971:%.*]] = icmp eq i64 [[TMP3670]], [[TMP3671]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4971]], label [[IF_THEN4973:%.*]], label [[IF_END4974:%.*]] +// SIMD-ONLY0: if.then4973: +// SIMD-ONLY0-NEXT: [[TMP3672:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3672]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4974]] +// SIMD-ONLY0: if.end4974: +// SIMD-ONLY0-NEXT: [[TMP3673:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3673]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3674:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3675:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4975:%.*]] = icmp eq i64 [[TMP3674]], [[TMP3675]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4975]], label [[IF_THEN4977:%.*]], label [[IF_ELSE4978:%.*]] +// SIMD-ONLY0: if.then4977: +// SIMD-ONLY0-NEXT: [[TMP3676:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3676]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4979:%.*]] +// SIMD-ONLY0: if.else4978: +// SIMD-ONLY0-NEXT: [[TMP3677:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3677]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4979]] +// SIMD-ONLY0: if.end4979: +// SIMD-ONLY0-NEXT: [[TMP3678:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3679:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4980:%.*]] = icmp eq i64 [[TMP3678]], [[TMP3679]] +// SIMD-ONLY0-NEXT: br i1 [[CMP4980]], label [[IF_THEN4982:%.*]], label [[IF_ELSE4983:%.*]] +// SIMD-ONLY0: if.then4982: +// SIMD-ONLY0-NEXT: [[TMP3680:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3680]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4984:%.*]] +// SIMD-ONLY0: if.else4983: +// SIMD-ONLY0-NEXT: [[TMP3681:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3681]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4984]] +// SIMD-ONLY0: if.end4984: +// SIMD-ONLY0-NEXT: [[TMP3682:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3683:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4985:%.*]] = icmp eq i64 [[TMP3682]], [[TMP3683]] +// SIMD-ONLY0-NEXT: [[CONV4986:%.*]] = zext i1 [[CMP4985]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4987:%.*]] = sext i32 [[CONV4986]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4987]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3684:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4988:%.*]] = icmp ne i64 [[TMP3684]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4988]], label [[IF_THEN4989:%.*]], label [[IF_END4990:%.*]] +// SIMD-ONLY0: if.then4989: +// SIMD-ONLY0-NEXT: [[TMP3685:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3685]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4990]] +// SIMD-ONLY0: if.end4990: +// SIMD-ONLY0-NEXT: [[TMP3686:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3687:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4991:%.*]] = icmp eq i64 [[TMP3686]], [[TMP3687]] +// SIMD-ONLY0-NEXT: [[CONV4992:%.*]] = zext i1 [[CMP4991]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4993:%.*]] = sext i32 [[CONV4992]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4993]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3688:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL4994:%.*]] = icmp ne i64 [[TMP3688]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL4994]], label [[IF_THEN4995:%.*]], label [[IF_END4996:%.*]] +// SIMD-ONLY0: if.then4995: +// SIMD-ONLY0-NEXT: [[TMP3689:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3689]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END4996]] +// SIMD-ONLY0: if.end4996: +// SIMD-ONLY0-NEXT: [[TMP3690:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3691:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP4997:%.*]] = icmp eq i64 [[TMP3690]], [[TMP3691]] +// SIMD-ONLY0-NEXT: [[CONV4998:%.*]] = zext i1 [[CMP4997]] to i32 +// SIMD-ONLY0-NEXT: [[CONV4999:%.*]] = sext i32 [[CONV4998]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV4999]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3692:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5000:%.*]] = icmp ne i64 [[TMP3692]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5000]], label [[IF_THEN5001:%.*]], label [[IF_ELSE5002:%.*]] +// SIMD-ONLY0: if.then5001: +// SIMD-ONLY0-NEXT: [[TMP3693:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3693]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5003:%.*]] +// SIMD-ONLY0: if.else5002: +// SIMD-ONLY0-NEXT: [[TMP3694:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3694]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5003]] +// SIMD-ONLY0: if.end5003: +// SIMD-ONLY0-NEXT: [[TMP3695:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3696:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5004:%.*]] = icmp eq i64 [[TMP3695]], [[TMP3696]] +// SIMD-ONLY0-NEXT: [[CONV5005:%.*]] = zext i1 [[CMP5004]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5006:%.*]] = sext i32 [[CONV5005]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5006]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3697:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5007:%.*]] = icmp ne i64 [[TMP3697]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5007]], label [[IF_THEN5008:%.*]], label [[IF_ELSE5009:%.*]] +// SIMD-ONLY0: if.then5008: +// SIMD-ONLY0-NEXT: [[TMP3698:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3698]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5010:%.*]] +// SIMD-ONLY0: if.else5009: +// SIMD-ONLY0-NEXT: [[TMP3699:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3699]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5010]] +// SIMD-ONLY0: if.end5010: +// SIMD-ONLY0-NEXT: [[TMP3700:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3700]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3701:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3702:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5011:%.*]] = icmp sgt i64 [[TMP3701]], [[TMP3702]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5011]], label [[IF_THEN5013:%.*]], label [[IF_END5014:%.*]] +// SIMD-ONLY0: if.then5013: +// SIMD-ONLY0-NEXT: [[TMP3703:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3703]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5014]] +// SIMD-ONLY0: if.end5014: +// SIMD-ONLY0-NEXT: [[TMP3704:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3704]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3705:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3706:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5015:%.*]] = icmp sgt i64 [[TMP3705]], [[TMP3706]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5015]], label [[IF_THEN5017:%.*]], label [[IF_END5018:%.*]] +// SIMD-ONLY0: if.then5017: +// SIMD-ONLY0-NEXT: [[TMP3707:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3707]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5018]] +// SIMD-ONLY0: if.end5018: +// SIMD-ONLY0-NEXT: [[TMP3708:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3708]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3709:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3710:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5019:%.*]] = icmp slt i64 [[TMP3709]], [[TMP3710]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5019]], label [[IF_THEN5021:%.*]], label [[IF_END5022:%.*]] +// SIMD-ONLY0: if.then5021: +// SIMD-ONLY0-NEXT: [[TMP3711:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3711]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5022]] +// SIMD-ONLY0: if.end5022: +// SIMD-ONLY0-NEXT: [[TMP3712:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3712]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3713:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3714:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5023:%.*]] = icmp slt i64 [[TMP3713]], [[TMP3714]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5023]], label [[IF_THEN5025:%.*]], label [[IF_END5026:%.*]] +// SIMD-ONLY0: if.then5025: +// SIMD-ONLY0-NEXT: [[TMP3715:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3715]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5026]] +// SIMD-ONLY0: if.end5026: +// SIMD-ONLY0-NEXT: [[TMP3716:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3716]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3717:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3718:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5027:%.*]] = icmp eq i64 [[TMP3717]], [[TMP3718]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5027]], label [[IF_THEN5029:%.*]], label [[IF_END5030:%.*]] +// SIMD-ONLY0: if.then5029: +// SIMD-ONLY0-NEXT: [[TMP3719:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3719]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5030]] +// SIMD-ONLY0: if.end5030: +// SIMD-ONLY0-NEXT: [[TMP3720:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3720]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3721:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3722:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5031:%.*]] = icmp eq i64 [[TMP3721]], [[TMP3722]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5031]], label [[IF_THEN5033:%.*]], label [[IF_END5034:%.*]] +// SIMD-ONLY0: if.then5033: +// SIMD-ONLY0-NEXT: [[TMP3723:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3723]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5034]] +// SIMD-ONLY0: if.end5034: +// SIMD-ONLY0-NEXT: [[TMP3724:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3725:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5035:%.*]] = icmp sgt i64 [[TMP3724]], [[TMP3725]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5035]], label [[IF_THEN5037:%.*]], label [[IF_END5038:%.*]] +// SIMD-ONLY0: if.then5037: +// SIMD-ONLY0-NEXT: [[TMP3726:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3726]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5038]] +// SIMD-ONLY0: if.end5038: +// SIMD-ONLY0-NEXT: [[TMP3727:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3727]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3728:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3729:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5039:%.*]] = icmp sgt i64 [[TMP3728]], [[TMP3729]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5039]], label [[IF_THEN5041:%.*]], label [[IF_END5042:%.*]] +// SIMD-ONLY0: if.then5041: +// SIMD-ONLY0-NEXT: [[TMP3730:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3730]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5042]] +// SIMD-ONLY0: if.end5042: +// SIMD-ONLY0-NEXT: [[TMP3731:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3731]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3732:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3733:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5043:%.*]] = icmp slt i64 [[TMP3732]], [[TMP3733]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5043]], label [[IF_THEN5045:%.*]], label [[IF_END5046:%.*]] +// SIMD-ONLY0: if.then5045: +// SIMD-ONLY0-NEXT: [[TMP3734:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3734]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5046]] +// SIMD-ONLY0: if.end5046: +// SIMD-ONLY0-NEXT: [[TMP3735:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3735]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3736:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3737:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5047:%.*]] = icmp slt i64 [[TMP3736]], [[TMP3737]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5047]], label [[IF_THEN5049:%.*]], label [[IF_END5050:%.*]] +// SIMD-ONLY0: if.then5049: +// SIMD-ONLY0-NEXT: [[TMP3738:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3738]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5050]] +// SIMD-ONLY0: if.end5050: +// SIMD-ONLY0-NEXT: [[TMP3739:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3739]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3740:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3741:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5051:%.*]] = icmp eq i64 [[TMP3740]], [[TMP3741]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5051]], label [[IF_THEN5053:%.*]], label [[IF_END5054:%.*]] +// SIMD-ONLY0: if.then5053: +// SIMD-ONLY0-NEXT: [[TMP3742:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3742]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5054]] +// SIMD-ONLY0: if.end5054: +// SIMD-ONLY0-NEXT: [[TMP3743:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3743]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3744:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3745:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5055:%.*]] = icmp eq i64 [[TMP3744]], [[TMP3745]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5055]], label [[IF_THEN5057:%.*]], label [[IF_END5058:%.*]] +// SIMD-ONLY0: if.then5057: +// SIMD-ONLY0-NEXT: [[TMP3746:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3746]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5058]] +// SIMD-ONLY0: if.end5058: +// SIMD-ONLY0-NEXT: [[TMP3747:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3747]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3748:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3749:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5059:%.*]] = icmp eq i64 [[TMP3748]], [[TMP3749]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5059]], label [[IF_THEN5061:%.*]], label [[IF_ELSE5062:%.*]] +// SIMD-ONLY0: if.then5061: +// SIMD-ONLY0-NEXT: [[TMP3750:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3750]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5063:%.*]] +// SIMD-ONLY0: if.else5062: +// SIMD-ONLY0-NEXT: [[TMP3751:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3751]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5063]] +// SIMD-ONLY0: if.end5063: +// SIMD-ONLY0-NEXT: [[TMP3752:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3753:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5064:%.*]] = icmp eq i64 [[TMP3752]], [[TMP3753]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5064]], label [[IF_THEN5066:%.*]], label [[IF_ELSE5067:%.*]] +// SIMD-ONLY0: if.then5066: +// SIMD-ONLY0-NEXT: [[TMP3754:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3754]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5068:%.*]] +// SIMD-ONLY0: if.else5067: +// SIMD-ONLY0-NEXT: [[TMP3755:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3755]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5068]] +// SIMD-ONLY0: if.end5068: +// SIMD-ONLY0-NEXT: [[TMP3756:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3757:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5069:%.*]] = icmp eq i64 [[TMP3756]], [[TMP3757]] +// SIMD-ONLY0-NEXT: [[CONV5070:%.*]] = zext i1 [[CMP5069]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5071:%.*]] = sext i32 [[CONV5070]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5071]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3758:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5072:%.*]] = icmp ne i64 [[TMP3758]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5072]], label [[IF_THEN5073:%.*]], label [[IF_END5074:%.*]] +// SIMD-ONLY0: if.then5073: +// SIMD-ONLY0-NEXT: [[TMP3759:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3759]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5074]] +// SIMD-ONLY0: if.end5074: +// SIMD-ONLY0-NEXT: [[TMP3760:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3761:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5075:%.*]] = icmp eq i64 [[TMP3760]], [[TMP3761]] +// SIMD-ONLY0-NEXT: [[CONV5076:%.*]] = zext i1 [[CMP5075]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5077:%.*]] = sext i32 [[CONV5076]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5077]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3762:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5078:%.*]] = icmp ne i64 [[TMP3762]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5078]], label [[IF_THEN5079:%.*]], label [[IF_END5080:%.*]] +// SIMD-ONLY0: if.then5079: +// SIMD-ONLY0-NEXT: [[TMP3763:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3763]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5080]] +// SIMD-ONLY0: if.end5080: +// SIMD-ONLY0-NEXT: [[TMP3764:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3765:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5081:%.*]] = icmp eq i64 [[TMP3764]], [[TMP3765]] +// SIMD-ONLY0-NEXT: [[CONV5082:%.*]] = zext i1 [[CMP5081]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5083:%.*]] = sext i32 [[CONV5082]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5083]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3766:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5084:%.*]] = icmp ne i64 [[TMP3766]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5084]], label [[IF_THEN5085:%.*]], label [[IF_ELSE5086:%.*]] +// SIMD-ONLY0: if.then5085: +// SIMD-ONLY0-NEXT: [[TMP3767:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3767]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5087:%.*]] +// SIMD-ONLY0: if.else5086: +// SIMD-ONLY0-NEXT: [[TMP3768:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3768]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5087]] +// SIMD-ONLY0: if.end5087: +// SIMD-ONLY0-NEXT: [[TMP3769:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3770:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5088:%.*]] = icmp eq i64 [[TMP3769]], [[TMP3770]] +// SIMD-ONLY0-NEXT: [[CONV5089:%.*]] = zext i1 [[CMP5088]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5090:%.*]] = sext i32 [[CONV5089]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5090]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3771:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5091:%.*]] = icmp ne i64 [[TMP3771]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5091]], label [[IF_THEN5092:%.*]], label [[IF_ELSE5093:%.*]] +// SIMD-ONLY0: if.then5092: +// SIMD-ONLY0-NEXT: [[TMP3772:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3772]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5094:%.*]] +// SIMD-ONLY0: if.else5093: +// SIMD-ONLY0-NEXT: [[TMP3773:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3773]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5094]] +// SIMD-ONLY0: if.end5094: +// SIMD-ONLY0-NEXT: [[TMP3774:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3774]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3775:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3776:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5095:%.*]] = icmp sgt i64 [[TMP3775]], [[TMP3776]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5095]], label [[IF_THEN5097:%.*]], label [[IF_END5098:%.*]] +// SIMD-ONLY0: if.then5097: +// SIMD-ONLY0-NEXT: [[TMP3777:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3777]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5098]] +// SIMD-ONLY0: if.end5098: +// SIMD-ONLY0-NEXT: [[TMP3778:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3778]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3779:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3780:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5099:%.*]] = icmp sgt i64 [[TMP3779]], [[TMP3780]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5099]], label [[IF_THEN5101:%.*]], label [[IF_END5102:%.*]] +// SIMD-ONLY0: if.then5101: +// SIMD-ONLY0-NEXT: [[TMP3781:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3781]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5102]] +// SIMD-ONLY0: if.end5102: +// SIMD-ONLY0-NEXT: [[TMP3782:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3782]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3783:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3784:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5103:%.*]] = icmp slt i64 [[TMP3783]], [[TMP3784]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5103]], label [[IF_THEN5105:%.*]], label [[IF_END5106:%.*]] +// SIMD-ONLY0: if.then5105: +// SIMD-ONLY0-NEXT: [[TMP3785:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3785]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5106]] +// SIMD-ONLY0: if.end5106: +// SIMD-ONLY0-NEXT: [[TMP3786:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3786]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3787:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3788:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5107:%.*]] = icmp slt i64 [[TMP3787]], [[TMP3788]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5107]], label [[IF_THEN5109:%.*]], label [[IF_END5110:%.*]] +// SIMD-ONLY0: if.then5109: +// SIMD-ONLY0-NEXT: [[TMP3789:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3789]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5110]] +// SIMD-ONLY0: if.end5110: +// SIMD-ONLY0-NEXT: [[TMP3790:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3790]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3791:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3792:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5111:%.*]] = icmp eq i64 [[TMP3791]], [[TMP3792]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5111]], label [[IF_THEN5113:%.*]], label [[IF_END5114:%.*]] +// SIMD-ONLY0: if.then5113: +// SIMD-ONLY0-NEXT: [[TMP3793:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3793]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5114]] +// SIMD-ONLY0: if.end5114: +// SIMD-ONLY0-NEXT: [[TMP3794:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3794]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3795:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3796:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5115:%.*]] = icmp eq i64 [[TMP3795]], [[TMP3796]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5115]], label [[IF_THEN5117:%.*]], label [[IF_END5118:%.*]] +// SIMD-ONLY0: if.then5117: +// SIMD-ONLY0-NEXT: [[TMP3797:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3797]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5118]] +// SIMD-ONLY0: if.end5118: +// SIMD-ONLY0-NEXT: [[TMP3798:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3799:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5119:%.*]] = icmp sgt i64 [[TMP3798]], [[TMP3799]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5119]], label [[IF_THEN5121:%.*]], label [[IF_END5122:%.*]] +// SIMD-ONLY0: if.then5121: +// SIMD-ONLY0-NEXT: [[TMP3800:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3800]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5122]] +// SIMD-ONLY0: if.end5122: +// SIMD-ONLY0-NEXT: [[TMP3801:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3801]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3802:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3803:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5123:%.*]] = icmp sgt i64 [[TMP3802]], [[TMP3803]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5123]], label [[IF_THEN5125:%.*]], label [[IF_END5126:%.*]] +// SIMD-ONLY0: if.then5125: +// SIMD-ONLY0-NEXT: [[TMP3804:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3804]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5126]] +// SIMD-ONLY0: if.end5126: +// SIMD-ONLY0-NEXT: [[TMP3805:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3805]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3806:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3807:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5127:%.*]] = icmp slt i64 [[TMP3806]], [[TMP3807]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5127]], label [[IF_THEN5129:%.*]], label [[IF_END5130:%.*]] +// SIMD-ONLY0: if.then5129: +// SIMD-ONLY0-NEXT: [[TMP3808:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3808]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5130]] +// SIMD-ONLY0: if.end5130: +// SIMD-ONLY0-NEXT: [[TMP3809:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3809]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3810:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3811:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5131:%.*]] = icmp slt i64 [[TMP3810]], [[TMP3811]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5131]], label [[IF_THEN5133:%.*]], label [[IF_END5134:%.*]] +// SIMD-ONLY0: if.then5133: +// SIMD-ONLY0-NEXT: [[TMP3812:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3812]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5134]] +// SIMD-ONLY0: if.end5134: +// SIMD-ONLY0-NEXT: [[TMP3813:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3813]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3814:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3815:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5135:%.*]] = icmp eq i64 [[TMP3814]], [[TMP3815]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5135]], label [[IF_THEN5137:%.*]], label [[IF_END5138:%.*]] +// SIMD-ONLY0: if.then5137: +// SIMD-ONLY0-NEXT: [[TMP3816:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3816]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5138]] +// SIMD-ONLY0: if.end5138: +// SIMD-ONLY0-NEXT: [[TMP3817:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3817]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3818:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3819:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5139:%.*]] = icmp eq i64 [[TMP3818]], [[TMP3819]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5139]], label [[IF_THEN5141:%.*]], label [[IF_END5142:%.*]] +// SIMD-ONLY0: if.then5141: +// SIMD-ONLY0-NEXT: [[TMP3820:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3820]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5142]] +// SIMD-ONLY0: if.end5142: +// SIMD-ONLY0-NEXT: [[TMP3821:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3821]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3822:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3823:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5143:%.*]] = icmp eq i64 [[TMP3822]], [[TMP3823]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5143]], label [[IF_THEN5145:%.*]], label [[IF_ELSE5146:%.*]] +// SIMD-ONLY0: if.then5145: +// SIMD-ONLY0-NEXT: [[TMP3824:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3824]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5147:%.*]] +// SIMD-ONLY0: if.else5146: +// SIMD-ONLY0-NEXT: [[TMP3825:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3825]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5147]] +// SIMD-ONLY0: if.end5147: +// SIMD-ONLY0-NEXT: [[TMP3826:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3827:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5148:%.*]] = icmp eq i64 [[TMP3826]], [[TMP3827]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5148]], label [[IF_THEN5150:%.*]], label [[IF_ELSE5151:%.*]] +// SIMD-ONLY0: if.then5150: +// SIMD-ONLY0-NEXT: [[TMP3828:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3828]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5152:%.*]] +// SIMD-ONLY0: if.else5151: +// SIMD-ONLY0-NEXT: [[TMP3829:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3829]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5152]] +// SIMD-ONLY0: if.end5152: +// SIMD-ONLY0-NEXT: [[TMP3830:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3831:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5153:%.*]] = icmp eq i64 [[TMP3830]], [[TMP3831]] +// SIMD-ONLY0-NEXT: [[CONV5154:%.*]] = zext i1 [[CMP5153]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5155:%.*]] = sext i32 [[CONV5154]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5155]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3832:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5156:%.*]] = icmp ne i64 [[TMP3832]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5156]], label [[IF_THEN5157:%.*]], label [[IF_END5158:%.*]] +// SIMD-ONLY0: if.then5157: +// SIMD-ONLY0-NEXT: [[TMP3833:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3833]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5158]] +// SIMD-ONLY0: if.end5158: +// SIMD-ONLY0-NEXT: [[TMP3834:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3835:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5159:%.*]] = icmp eq i64 [[TMP3834]], [[TMP3835]] +// SIMD-ONLY0-NEXT: [[CONV5160:%.*]] = zext i1 [[CMP5159]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5161:%.*]] = sext i32 [[CONV5160]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5161]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3836:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5162:%.*]] = icmp ne i64 [[TMP3836]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5162]], label [[IF_THEN5163:%.*]], label [[IF_END5164:%.*]] +// SIMD-ONLY0: if.then5163: +// SIMD-ONLY0-NEXT: [[TMP3837:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3837]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5164]] +// SIMD-ONLY0: if.end5164: +// SIMD-ONLY0-NEXT: [[TMP3838:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3839:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5165:%.*]] = icmp eq i64 [[TMP3838]], [[TMP3839]] +// SIMD-ONLY0-NEXT: [[CONV5166:%.*]] = zext i1 [[CMP5165]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5167:%.*]] = sext i32 [[CONV5166]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5167]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3840:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5168:%.*]] = icmp ne i64 [[TMP3840]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5168]], label [[IF_THEN5169:%.*]], label [[IF_ELSE5170:%.*]] +// SIMD-ONLY0: if.then5169: +// SIMD-ONLY0-NEXT: [[TMP3841:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3841]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5171:%.*]] +// SIMD-ONLY0: if.else5170: +// SIMD-ONLY0-NEXT: [[TMP3842:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3842]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5171]] +// SIMD-ONLY0: if.end5171: +// SIMD-ONLY0-NEXT: [[TMP3843:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3844:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5172:%.*]] = icmp eq i64 [[TMP3843]], [[TMP3844]] +// SIMD-ONLY0-NEXT: [[CONV5173:%.*]] = zext i1 [[CMP5172]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5174:%.*]] = sext i32 [[CONV5173]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5174]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3845:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5175:%.*]] = icmp ne i64 [[TMP3845]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5175]], label [[IF_THEN5176:%.*]], label [[IF_ELSE5177:%.*]] +// SIMD-ONLY0: if.then5176: +// SIMD-ONLY0-NEXT: [[TMP3846:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3846]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5178:%.*]] +// SIMD-ONLY0: if.else5177: +// SIMD-ONLY0-NEXT: [[TMP3847:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3847]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5178]] +// SIMD-ONLY0: if.end5178: +// SIMD-ONLY0-NEXT: [[TMP3848:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3848]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3849:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3850:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5179:%.*]] = icmp sgt i64 [[TMP3849]], [[TMP3850]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5179]], label [[IF_THEN5181:%.*]], label [[IF_END5182:%.*]] +// SIMD-ONLY0: if.then5181: +// SIMD-ONLY0-NEXT: [[TMP3851:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3851]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5182]] +// SIMD-ONLY0: if.end5182: +// SIMD-ONLY0-NEXT: [[TMP3852:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3852]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3853:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3854:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5183:%.*]] = icmp sgt i64 [[TMP3853]], [[TMP3854]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5183]], label [[IF_THEN5185:%.*]], label [[IF_END5186:%.*]] +// SIMD-ONLY0: if.then5185: +// SIMD-ONLY0-NEXT: [[TMP3855:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3855]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5186]] +// SIMD-ONLY0: if.end5186: +// SIMD-ONLY0-NEXT: [[TMP3856:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3856]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3857:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3858:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5187:%.*]] = icmp slt i64 [[TMP3857]], [[TMP3858]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5187]], label [[IF_THEN5189:%.*]], label [[IF_END5190:%.*]] +// SIMD-ONLY0: if.then5189: +// SIMD-ONLY0-NEXT: [[TMP3859:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3859]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5190]] +// SIMD-ONLY0: if.end5190: +// SIMD-ONLY0-NEXT: [[TMP3860:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3860]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3861:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3862:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5191:%.*]] = icmp slt i64 [[TMP3861]], [[TMP3862]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5191]], label [[IF_THEN5193:%.*]], label [[IF_END5194:%.*]] +// SIMD-ONLY0: if.then5193: +// SIMD-ONLY0-NEXT: [[TMP3863:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3863]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5194]] +// SIMD-ONLY0: if.end5194: +// SIMD-ONLY0-NEXT: [[TMP3864:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3864]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3865:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3866:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5195:%.*]] = icmp eq i64 [[TMP3865]], [[TMP3866]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5195]], label [[IF_THEN5197:%.*]], label [[IF_END5198:%.*]] +// SIMD-ONLY0: if.then5197: +// SIMD-ONLY0-NEXT: [[TMP3867:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3867]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5198]] +// SIMD-ONLY0: if.end5198: +// SIMD-ONLY0-NEXT: [[TMP3868:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3868]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3869:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3870:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5199:%.*]] = icmp eq i64 [[TMP3869]], [[TMP3870]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5199]], label [[IF_THEN5201:%.*]], label [[IF_END5202:%.*]] +// SIMD-ONLY0: if.then5201: +// SIMD-ONLY0-NEXT: [[TMP3871:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3871]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5202]] +// SIMD-ONLY0: if.end5202: +// SIMD-ONLY0-NEXT: [[TMP3872:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3873:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5203:%.*]] = icmp sgt i64 [[TMP3872]], [[TMP3873]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5203]], label [[IF_THEN5205:%.*]], label [[IF_END5206:%.*]] +// SIMD-ONLY0: if.then5205: +// SIMD-ONLY0-NEXT: [[TMP3874:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3874]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5206]] +// SIMD-ONLY0: if.end5206: +// SIMD-ONLY0-NEXT: [[TMP3875:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3875]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3876:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3877:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5207:%.*]] = icmp sgt i64 [[TMP3876]], [[TMP3877]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5207]], label [[IF_THEN5209:%.*]], label [[IF_END5210:%.*]] +// SIMD-ONLY0: if.then5209: +// SIMD-ONLY0-NEXT: [[TMP3878:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3878]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5210]] +// SIMD-ONLY0: if.end5210: +// SIMD-ONLY0-NEXT: [[TMP3879:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3879]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3880:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3881:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5211:%.*]] = icmp slt i64 [[TMP3880]], [[TMP3881]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5211]], label [[IF_THEN5213:%.*]], label [[IF_END5214:%.*]] +// SIMD-ONLY0: if.then5213: +// SIMD-ONLY0-NEXT: [[TMP3882:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3882]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5214]] +// SIMD-ONLY0: if.end5214: +// SIMD-ONLY0-NEXT: [[TMP3883:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3883]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3884:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3885:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5215:%.*]] = icmp slt i64 [[TMP3884]], [[TMP3885]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5215]], label [[IF_THEN5217:%.*]], label [[IF_END5218:%.*]] +// SIMD-ONLY0: if.then5217: +// SIMD-ONLY0-NEXT: [[TMP3886:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3886]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5218]] +// SIMD-ONLY0: if.end5218: +// SIMD-ONLY0-NEXT: [[TMP3887:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3887]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3888:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3889:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5219:%.*]] = icmp eq i64 [[TMP3888]], [[TMP3889]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5219]], label [[IF_THEN5221:%.*]], label [[IF_END5222:%.*]] +// SIMD-ONLY0: if.then5221: +// SIMD-ONLY0-NEXT: [[TMP3890:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3890]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5222]] +// SIMD-ONLY0: if.end5222: +// SIMD-ONLY0-NEXT: [[TMP3891:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3891]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3892:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3893:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5223:%.*]] = icmp eq i64 [[TMP3892]], [[TMP3893]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5223]], label [[IF_THEN5225:%.*]], label [[IF_END5226:%.*]] +// SIMD-ONLY0: if.then5225: +// SIMD-ONLY0-NEXT: [[TMP3894:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3894]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5226]] +// SIMD-ONLY0: if.end5226: +// SIMD-ONLY0-NEXT: [[TMP3895:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3895]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3896:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3897:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5227:%.*]] = icmp eq i64 [[TMP3896]], [[TMP3897]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5227]], label [[IF_THEN5229:%.*]], label [[IF_ELSE5230:%.*]] +// SIMD-ONLY0: if.then5229: +// SIMD-ONLY0-NEXT: [[TMP3898:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3898]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5231:%.*]] +// SIMD-ONLY0: if.else5230: +// SIMD-ONLY0-NEXT: [[TMP3899:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3899]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5231]] +// SIMD-ONLY0: if.end5231: +// SIMD-ONLY0-NEXT: [[TMP3900:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3901:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5232:%.*]] = icmp eq i64 [[TMP3900]], [[TMP3901]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5232]], label [[IF_THEN5234:%.*]], label [[IF_ELSE5235:%.*]] +// SIMD-ONLY0: if.then5234: +// SIMD-ONLY0-NEXT: [[TMP3902:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3902]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5236:%.*]] +// SIMD-ONLY0: if.else5235: +// SIMD-ONLY0-NEXT: [[TMP3903:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3903]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5236]] +// SIMD-ONLY0: if.end5236: +// SIMD-ONLY0-NEXT: [[TMP3904:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3905:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5237:%.*]] = icmp eq i64 [[TMP3904]], [[TMP3905]] +// SIMD-ONLY0-NEXT: [[CONV5238:%.*]] = zext i1 [[CMP5237]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5239:%.*]] = sext i32 [[CONV5238]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5239]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3906:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5240:%.*]] = icmp ne i64 [[TMP3906]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5240]], label [[IF_THEN5241:%.*]], label [[IF_END5242:%.*]] +// SIMD-ONLY0: if.then5241: +// SIMD-ONLY0-NEXT: [[TMP3907:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3907]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5242]] +// SIMD-ONLY0: if.end5242: +// SIMD-ONLY0-NEXT: [[TMP3908:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3909:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5243:%.*]] = icmp eq i64 [[TMP3908]], [[TMP3909]] +// SIMD-ONLY0-NEXT: [[CONV5244:%.*]] = zext i1 [[CMP5243]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5245:%.*]] = sext i32 [[CONV5244]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5245]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3910:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5246:%.*]] = icmp ne i64 [[TMP3910]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5246]], label [[IF_THEN5247:%.*]], label [[IF_END5248:%.*]] +// SIMD-ONLY0: if.then5247: +// SIMD-ONLY0-NEXT: [[TMP3911:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3911]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5248]] +// SIMD-ONLY0: if.end5248: +// SIMD-ONLY0-NEXT: [[TMP3912:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3913:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5249:%.*]] = icmp eq i64 [[TMP3912]], [[TMP3913]] +// SIMD-ONLY0-NEXT: [[CONV5250:%.*]] = zext i1 [[CMP5249]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5251:%.*]] = sext i32 [[CONV5250]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5251]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3914:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5252:%.*]] = icmp ne i64 [[TMP3914]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5252]], label [[IF_THEN5253:%.*]], label [[IF_ELSE5254:%.*]] +// SIMD-ONLY0: if.then5253: +// SIMD-ONLY0-NEXT: [[TMP3915:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3915]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5255:%.*]] +// SIMD-ONLY0: if.else5254: +// SIMD-ONLY0-NEXT: [[TMP3916:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3916]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5255]] +// SIMD-ONLY0: if.end5255: +// SIMD-ONLY0-NEXT: [[TMP3917:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3918:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5256:%.*]] = icmp eq i64 [[TMP3917]], [[TMP3918]] +// SIMD-ONLY0-NEXT: [[CONV5257:%.*]] = zext i1 [[CMP5256]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5258:%.*]] = sext i32 [[CONV5257]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5258]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3919:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5259:%.*]] = icmp ne i64 [[TMP3919]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5259]], label [[IF_THEN5260:%.*]], label [[IF_ELSE5261:%.*]] +// SIMD-ONLY0: if.then5260: +// SIMD-ONLY0-NEXT: [[TMP3920:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3920]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5262:%.*]] +// SIMD-ONLY0: if.else5261: +// SIMD-ONLY0-NEXT: [[TMP3921:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3921]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5262]] +// SIMD-ONLY0: if.end5262: +// SIMD-ONLY0-NEXT: [[TMP3922:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3922]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3923:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3924:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5263:%.*]] = icmp sgt i64 [[TMP3923]], [[TMP3924]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5263]], label [[IF_THEN5265:%.*]], label [[IF_END5266:%.*]] +// SIMD-ONLY0: if.then5265: +// SIMD-ONLY0-NEXT: [[TMP3925:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3925]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5266]] +// SIMD-ONLY0: if.end5266: +// SIMD-ONLY0-NEXT: [[TMP3926:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3926]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3927:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3928:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5267:%.*]] = icmp sgt i64 [[TMP3927]], [[TMP3928]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5267]], label [[IF_THEN5269:%.*]], label [[IF_END5270:%.*]] +// SIMD-ONLY0: if.then5269: +// SIMD-ONLY0-NEXT: [[TMP3929:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3929]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5270]] +// SIMD-ONLY0: if.end5270: +// SIMD-ONLY0-NEXT: [[TMP3930:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3930]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3931:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3932:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5271:%.*]] = icmp slt i64 [[TMP3931]], [[TMP3932]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5271]], label [[IF_THEN5273:%.*]], label [[IF_END5274:%.*]] +// SIMD-ONLY0: if.then5273: +// SIMD-ONLY0-NEXT: [[TMP3933:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3933]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5274]] +// SIMD-ONLY0: if.end5274: +// SIMD-ONLY0-NEXT: [[TMP3934:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3934]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3935:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3936:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5275:%.*]] = icmp slt i64 [[TMP3935]], [[TMP3936]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5275]], label [[IF_THEN5277:%.*]], label [[IF_END5278:%.*]] +// SIMD-ONLY0: if.then5277: +// SIMD-ONLY0-NEXT: [[TMP3937:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3937]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5278]] +// SIMD-ONLY0: if.end5278: +// SIMD-ONLY0-NEXT: [[TMP3938:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3938]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3939:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3940:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5279:%.*]] = icmp eq i64 [[TMP3939]], [[TMP3940]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5279]], label [[IF_THEN5281:%.*]], label [[IF_END5282:%.*]] +// SIMD-ONLY0: if.then5281: +// SIMD-ONLY0-NEXT: [[TMP3941:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3941]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5282]] +// SIMD-ONLY0: if.end5282: +// SIMD-ONLY0-NEXT: [[TMP3942:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3942]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3943:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3944:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5283:%.*]] = icmp eq i64 [[TMP3943]], [[TMP3944]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5283]], label [[IF_THEN5285:%.*]], label [[IF_END5286:%.*]] +// SIMD-ONLY0: if.then5285: +// SIMD-ONLY0-NEXT: [[TMP3945:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3945]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5286]] +// SIMD-ONLY0: if.end5286: +// SIMD-ONLY0-NEXT: [[TMP3946:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3947:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5287:%.*]] = icmp sgt i64 [[TMP3946]], [[TMP3947]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5287]], label [[IF_THEN5289:%.*]], label [[IF_END5290:%.*]] +// SIMD-ONLY0: if.then5289: +// SIMD-ONLY0-NEXT: [[TMP3948:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3948]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5290]] +// SIMD-ONLY0: if.end5290: +// SIMD-ONLY0-NEXT: [[TMP3949:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3949]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3950:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3951:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5291:%.*]] = icmp sgt i64 [[TMP3950]], [[TMP3951]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5291]], label [[IF_THEN5293:%.*]], label [[IF_END5294:%.*]] +// SIMD-ONLY0: if.then5293: +// SIMD-ONLY0-NEXT: [[TMP3952:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3952]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5294]] +// SIMD-ONLY0: if.end5294: +// SIMD-ONLY0-NEXT: [[TMP3953:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3953]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3954:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3955:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5295:%.*]] = icmp slt i64 [[TMP3954]], [[TMP3955]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5295]], label [[IF_THEN5297:%.*]], label [[IF_END5298:%.*]] +// SIMD-ONLY0: if.then5297: +// SIMD-ONLY0-NEXT: [[TMP3956:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3956]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5298]] +// SIMD-ONLY0: if.end5298: +// SIMD-ONLY0-NEXT: [[TMP3957:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3957]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3958:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3959:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5299:%.*]] = icmp slt i64 [[TMP3958]], [[TMP3959]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5299]], label [[IF_THEN5301:%.*]], label [[IF_END5302:%.*]] +// SIMD-ONLY0: if.then5301: +// SIMD-ONLY0-NEXT: [[TMP3960:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3960]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5302]] +// SIMD-ONLY0: if.end5302: +// SIMD-ONLY0-NEXT: [[TMP3961:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3961]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3962:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3963:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5303:%.*]] = icmp eq i64 [[TMP3962]], [[TMP3963]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5303]], label [[IF_THEN5305:%.*]], label [[IF_END5306:%.*]] +// SIMD-ONLY0: if.then5305: +// SIMD-ONLY0-NEXT: [[TMP3964:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3964]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5306]] +// SIMD-ONLY0: if.end5306: +// SIMD-ONLY0-NEXT: [[TMP3965:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3965]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3966:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3967:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5307:%.*]] = icmp eq i64 [[TMP3966]], [[TMP3967]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5307]], label [[IF_THEN5309:%.*]], label [[IF_END5310:%.*]] +// SIMD-ONLY0: if.then5309: +// SIMD-ONLY0-NEXT: [[TMP3968:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3968]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5310]] +// SIMD-ONLY0: if.end5310: +// SIMD-ONLY0-NEXT: [[TMP3969:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3969]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3970:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3971:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5311:%.*]] = icmp eq i64 [[TMP3970]], [[TMP3971]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5311]], label [[IF_THEN5313:%.*]], label [[IF_ELSE5314:%.*]] +// SIMD-ONLY0: if.then5313: +// SIMD-ONLY0-NEXT: [[TMP3972:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3972]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5315:%.*]] +// SIMD-ONLY0: if.else5314: +// SIMD-ONLY0-NEXT: [[TMP3973:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3973]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5315]] +// SIMD-ONLY0: if.end5315: +// SIMD-ONLY0-NEXT: [[TMP3974:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3975:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5316:%.*]] = icmp eq i64 [[TMP3974]], [[TMP3975]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5316]], label [[IF_THEN5318:%.*]], label [[IF_ELSE5319:%.*]] +// SIMD-ONLY0: if.then5318: +// SIMD-ONLY0-NEXT: [[TMP3976:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3976]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5320:%.*]] +// SIMD-ONLY0: if.else5319: +// SIMD-ONLY0-NEXT: [[TMP3977:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3977]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5320]] +// SIMD-ONLY0: if.end5320: +// SIMD-ONLY0-NEXT: [[TMP3978:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3979:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5321:%.*]] = icmp eq i64 [[TMP3978]], [[TMP3979]] +// SIMD-ONLY0-NEXT: [[CONV5322:%.*]] = zext i1 [[CMP5321]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5323:%.*]] = sext i32 [[CONV5322]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5323]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3980:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5324:%.*]] = icmp ne i64 [[TMP3980]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5324]], label [[IF_THEN5325:%.*]], label [[IF_END5326:%.*]] +// SIMD-ONLY0: if.then5325: +// SIMD-ONLY0-NEXT: [[TMP3981:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3981]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5326]] +// SIMD-ONLY0: if.end5326: +// SIMD-ONLY0-NEXT: [[TMP3982:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3983:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5327:%.*]] = icmp eq i64 [[TMP3982]], [[TMP3983]] +// SIMD-ONLY0-NEXT: [[CONV5328:%.*]] = zext i1 [[CMP5327]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5329:%.*]] = sext i32 [[CONV5328]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5329]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3984:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5330:%.*]] = icmp ne i64 [[TMP3984]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5330]], label [[IF_THEN5331:%.*]], label [[IF_END5332:%.*]] +// SIMD-ONLY0: if.then5331: +// SIMD-ONLY0-NEXT: [[TMP3985:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3985]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5332]] +// SIMD-ONLY0: if.end5332: +// SIMD-ONLY0-NEXT: [[TMP3986:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3987:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5333:%.*]] = icmp eq i64 [[TMP3986]], [[TMP3987]] +// SIMD-ONLY0-NEXT: [[CONV5334:%.*]] = zext i1 [[CMP5333]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5335:%.*]] = sext i32 [[CONV5334]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5335]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3988:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5336:%.*]] = icmp ne i64 [[TMP3988]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5336]], label [[IF_THEN5337:%.*]], label [[IF_ELSE5338:%.*]] +// SIMD-ONLY0: if.then5337: +// SIMD-ONLY0-NEXT: [[TMP3989:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3989]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5339:%.*]] +// SIMD-ONLY0: if.else5338: +// SIMD-ONLY0-NEXT: [[TMP3990:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3990]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5339]] +// SIMD-ONLY0: if.end5339: +// SIMD-ONLY0-NEXT: [[TMP3991:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3992:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5340:%.*]] = icmp eq i64 [[TMP3991]], [[TMP3992]] +// SIMD-ONLY0-NEXT: [[CONV5341:%.*]] = zext i1 [[CMP5340]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5342:%.*]] = sext i32 [[CONV5341]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5342]], ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3993:%.*]] = load i64, ptr [[LLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5343:%.*]] = icmp ne i64 [[TMP3993]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5343]], label [[IF_THEN5344:%.*]], label [[IF_ELSE5345:%.*]] +// SIMD-ONLY0: if.then5344: +// SIMD-ONLY0-NEXT: [[TMP3994:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3994]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5346:%.*]] +// SIMD-ONLY0: if.else5345: +// SIMD-ONLY0-NEXT: [[TMP3995:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3995]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5346]] +// SIMD-ONLY0: if.end5346: +// SIMD-ONLY0-NEXT: [[TMP3996:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3996]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3997:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3998:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5347:%.*]] = icmp ugt i64 [[TMP3997]], [[TMP3998]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5347]], label [[IF_THEN5349:%.*]], label [[IF_END5350:%.*]] +// SIMD-ONLY0: if.then5349: +// SIMD-ONLY0-NEXT: [[TMP3999:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP3999]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5350]] +// SIMD-ONLY0: if.end5350: +// SIMD-ONLY0-NEXT: [[TMP4000:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4000]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4001:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4002:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5351:%.*]] = icmp ugt i64 [[TMP4001]], [[TMP4002]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5351]], label [[IF_THEN5353:%.*]], label [[IF_END5354:%.*]] +// SIMD-ONLY0: if.then5353: +// SIMD-ONLY0-NEXT: [[TMP4003:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4003]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5354]] +// SIMD-ONLY0: if.end5354: +// SIMD-ONLY0-NEXT: [[TMP4004:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4004]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4005:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4006:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5355:%.*]] = icmp ult i64 [[TMP4005]], [[TMP4006]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5355]], label [[IF_THEN5357:%.*]], label [[IF_END5358:%.*]] +// SIMD-ONLY0: if.then5357: +// SIMD-ONLY0-NEXT: [[TMP4007:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4007]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5358]] +// SIMD-ONLY0: if.end5358: +// SIMD-ONLY0-NEXT: [[TMP4008:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4008]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4009:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4010:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5359:%.*]] = icmp ult i64 [[TMP4009]], [[TMP4010]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5359]], label [[IF_THEN5361:%.*]], label [[IF_END5362:%.*]] +// SIMD-ONLY0: if.then5361: +// SIMD-ONLY0-NEXT: [[TMP4011:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4011]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5362]] +// SIMD-ONLY0: if.end5362: +// SIMD-ONLY0-NEXT: [[TMP4012:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4012]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4013:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4014:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5363:%.*]] = icmp eq i64 [[TMP4013]], [[TMP4014]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5363]], label [[IF_THEN5365:%.*]], label [[IF_END5366:%.*]] +// SIMD-ONLY0: if.then5365: +// SIMD-ONLY0-NEXT: [[TMP4015:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4015]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5366]] +// SIMD-ONLY0: if.end5366: +// SIMD-ONLY0-NEXT: [[TMP4016:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4016]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4017:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4018:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5367:%.*]] = icmp eq i64 [[TMP4017]], [[TMP4018]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5367]], label [[IF_THEN5369:%.*]], label [[IF_END5370:%.*]] +// SIMD-ONLY0: if.then5369: +// SIMD-ONLY0-NEXT: [[TMP4019:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4019]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5370]] +// SIMD-ONLY0: if.end5370: +// SIMD-ONLY0-NEXT: [[TMP4020:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4021:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5371:%.*]] = icmp ugt i64 [[TMP4020]], [[TMP4021]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5371]], label [[IF_THEN5373:%.*]], label [[IF_END5374:%.*]] +// SIMD-ONLY0: if.then5373: +// SIMD-ONLY0-NEXT: [[TMP4022:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4022]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5374]] +// SIMD-ONLY0: if.end5374: +// SIMD-ONLY0-NEXT: [[TMP4023:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4023]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4024:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4025:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5375:%.*]] = icmp ugt i64 [[TMP4024]], [[TMP4025]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5375]], label [[IF_THEN5377:%.*]], label [[IF_END5378:%.*]] +// SIMD-ONLY0: if.then5377: +// SIMD-ONLY0-NEXT: [[TMP4026:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4026]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5378]] +// SIMD-ONLY0: if.end5378: +// SIMD-ONLY0-NEXT: [[TMP4027:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4027]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4028:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4029:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5379:%.*]] = icmp ult i64 [[TMP4028]], [[TMP4029]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5379]], label [[IF_THEN5381:%.*]], label [[IF_END5382:%.*]] +// SIMD-ONLY0: if.then5381: +// SIMD-ONLY0-NEXT: [[TMP4030:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4030]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5382]] +// SIMD-ONLY0: if.end5382: +// SIMD-ONLY0-NEXT: [[TMP4031:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4031]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4032:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4033:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5383:%.*]] = icmp ult i64 [[TMP4032]], [[TMP4033]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5383]], label [[IF_THEN5385:%.*]], label [[IF_END5386:%.*]] +// SIMD-ONLY0: if.then5385: +// SIMD-ONLY0-NEXT: [[TMP4034:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4034]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5386]] +// SIMD-ONLY0: if.end5386: +// SIMD-ONLY0-NEXT: [[TMP4035:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4035]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4036:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4037:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5387:%.*]] = icmp eq i64 [[TMP4036]], [[TMP4037]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5387]], label [[IF_THEN5389:%.*]], label [[IF_END5390:%.*]] +// SIMD-ONLY0: if.then5389: +// SIMD-ONLY0-NEXT: [[TMP4038:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4038]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5390]] +// SIMD-ONLY0: if.end5390: +// SIMD-ONLY0-NEXT: [[TMP4039:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4039]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4040:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4041:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5391:%.*]] = icmp eq i64 [[TMP4040]], [[TMP4041]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5391]], label [[IF_THEN5393:%.*]], label [[IF_END5394:%.*]] +// SIMD-ONLY0: if.then5393: +// SIMD-ONLY0-NEXT: [[TMP4042:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4042]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5394]] +// SIMD-ONLY0: if.end5394: +// SIMD-ONLY0-NEXT: [[TMP4043:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4043]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4044:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4045:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5395:%.*]] = icmp eq i64 [[TMP4044]], [[TMP4045]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5395]], label [[IF_THEN5397:%.*]], label [[IF_ELSE5398:%.*]] +// SIMD-ONLY0: if.then5397: +// SIMD-ONLY0-NEXT: [[TMP4046:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4046]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5399:%.*]] +// SIMD-ONLY0: if.else5398: +// SIMD-ONLY0-NEXT: [[TMP4047:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4047]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5399]] +// SIMD-ONLY0: if.end5399: +// SIMD-ONLY0-NEXT: [[TMP4048:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4049:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5400:%.*]] = icmp eq i64 [[TMP4048]], [[TMP4049]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5400]], label [[IF_THEN5402:%.*]], label [[IF_ELSE5403:%.*]] +// SIMD-ONLY0: if.then5402: +// SIMD-ONLY0-NEXT: [[TMP4050:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4050]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5404:%.*]] +// SIMD-ONLY0: if.else5403: +// SIMD-ONLY0-NEXT: [[TMP4051:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4051]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5404]] +// SIMD-ONLY0: if.end5404: +// SIMD-ONLY0-NEXT: [[TMP4052:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4053:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5405:%.*]] = icmp eq i64 [[TMP4052]], [[TMP4053]] +// SIMD-ONLY0-NEXT: [[CONV5406:%.*]] = zext i1 [[CMP5405]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5407:%.*]] = sext i32 [[CONV5406]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5407]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4054:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5408:%.*]] = icmp ne i64 [[TMP4054]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5408]], label [[IF_THEN5409:%.*]], label [[IF_END5410:%.*]] +// SIMD-ONLY0: if.then5409: +// SIMD-ONLY0-NEXT: [[TMP4055:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4055]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5410]] +// SIMD-ONLY0: if.end5410: +// SIMD-ONLY0-NEXT: [[TMP4056:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4057:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5411:%.*]] = icmp eq i64 [[TMP4056]], [[TMP4057]] +// SIMD-ONLY0-NEXT: [[CONV5412:%.*]] = zext i1 [[CMP5411]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5413:%.*]] = sext i32 [[CONV5412]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5413]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4058:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5414:%.*]] = icmp ne i64 [[TMP4058]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5414]], label [[IF_THEN5415:%.*]], label [[IF_END5416:%.*]] +// SIMD-ONLY0: if.then5415: +// SIMD-ONLY0-NEXT: [[TMP4059:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4059]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5416]] +// SIMD-ONLY0: if.end5416: +// SIMD-ONLY0-NEXT: [[TMP4060:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4061:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5417:%.*]] = icmp eq i64 [[TMP4060]], [[TMP4061]] +// SIMD-ONLY0-NEXT: [[CONV5418:%.*]] = zext i1 [[CMP5417]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5419:%.*]] = sext i32 [[CONV5418]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5419]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4062:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5420:%.*]] = icmp ne i64 [[TMP4062]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5420]], label [[IF_THEN5421:%.*]], label [[IF_ELSE5422:%.*]] +// SIMD-ONLY0: if.then5421: +// SIMD-ONLY0-NEXT: [[TMP4063:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4063]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5423:%.*]] +// SIMD-ONLY0: if.else5422: +// SIMD-ONLY0-NEXT: [[TMP4064:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4064]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5423]] +// SIMD-ONLY0: if.end5423: +// SIMD-ONLY0-NEXT: [[TMP4065:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4066:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5424:%.*]] = icmp eq i64 [[TMP4065]], [[TMP4066]] +// SIMD-ONLY0-NEXT: [[CONV5425:%.*]] = zext i1 [[CMP5424]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5426:%.*]] = sext i32 [[CONV5425]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5426]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4067:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5427:%.*]] = icmp ne i64 [[TMP4067]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5427]], label [[IF_THEN5428:%.*]], label [[IF_ELSE5429:%.*]] +// SIMD-ONLY0: if.then5428: +// SIMD-ONLY0-NEXT: [[TMP4068:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4068]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5430:%.*]] +// SIMD-ONLY0: if.else5429: +// SIMD-ONLY0-NEXT: [[TMP4069:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4069]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5430]] +// SIMD-ONLY0: if.end5430: +// SIMD-ONLY0-NEXT: [[TMP4070:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4070]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4071:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4072:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5431:%.*]] = icmp ugt i64 [[TMP4071]], [[TMP4072]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5431]], label [[IF_THEN5433:%.*]], label [[IF_END5434:%.*]] +// SIMD-ONLY0: if.then5433: +// SIMD-ONLY0-NEXT: [[TMP4073:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4073]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5434]] +// SIMD-ONLY0: if.end5434: +// SIMD-ONLY0-NEXT: [[TMP4074:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4074]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4075:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4076:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5435:%.*]] = icmp ugt i64 [[TMP4075]], [[TMP4076]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5435]], label [[IF_THEN5437:%.*]], label [[IF_END5438:%.*]] +// SIMD-ONLY0: if.then5437: +// SIMD-ONLY0-NEXT: [[TMP4077:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4077]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5438]] +// SIMD-ONLY0: if.end5438: +// SIMD-ONLY0-NEXT: [[TMP4078:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4078]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4079:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4080:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5439:%.*]] = icmp ult i64 [[TMP4079]], [[TMP4080]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5439]], label [[IF_THEN5441:%.*]], label [[IF_END5442:%.*]] +// SIMD-ONLY0: if.then5441: +// SIMD-ONLY0-NEXT: [[TMP4081:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4081]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5442]] +// SIMD-ONLY0: if.end5442: +// SIMD-ONLY0-NEXT: [[TMP4082:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4082]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4083:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4084:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5443:%.*]] = icmp ult i64 [[TMP4083]], [[TMP4084]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5443]], label [[IF_THEN5445:%.*]], label [[IF_END5446:%.*]] +// SIMD-ONLY0: if.then5445: +// SIMD-ONLY0-NEXT: [[TMP4085:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4085]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5446]] +// SIMD-ONLY0: if.end5446: +// SIMD-ONLY0-NEXT: [[TMP4086:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4086]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4087:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4088:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5447:%.*]] = icmp eq i64 [[TMP4087]], [[TMP4088]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5447]], label [[IF_THEN5449:%.*]], label [[IF_END5450:%.*]] +// SIMD-ONLY0: if.then5449: +// SIMD-ONLY0-NEXT: [[TMP4089:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4089]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5450]] +// SIMD-ONLY0: if.end5450: +// SIMD-ONLY0-NEXT: [[TMP4090:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4090]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4091:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4092:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5451:%.*]] = icmp eq i64 [[TMP4091]], [[TMP4092]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5451]], label [[IF_THEN5453:%.*]], label [[IF_END5454:%.*]] +// SIMD-ONLY0: if.then5453: +// SIMD-ONLY0-NEXT: [[TMP4093:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4093]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5454]] +// SIMD-ONLY0: if.end5454: +// SIMD-ONLY0-NEXT: [[TMP4094:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4095:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5455:%.*]] = icmp ugt i64 [[TMP4094]], [[TMP4095]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5455]], label [[IF_THEN5457:%.*]], label [[IF_END5458:%.*]] +// SIMD-ONLY0: if.then5457: +// SIMD-ONLY0-NEXT: [[TMP4096:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4096]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5458]] +// SIMD-ONLY0: if.end5458: +// SIMD-ONLY0-NEXT: [[TMP4097:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4097]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4098:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4099:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5459:%.*]] = icmp ugt i64 [[TMP4098]], [[TMP4099]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5459]], label [[IF_THEN5461:%.*]], label [[IF_END5462:%.*]] +// SIMD-ONLY0: if.then5461: +// SIMD-ONLY0-NEXT: [[TMP4100:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4100]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5462]] +// SIMD-ONLY0: if.end5462: +// SIMD-ONLY0-NEXT: [[TMP4101:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4101]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4102:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4103:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5463:%.*]] = icmp ult i64 [[TMP4102]], [[TMP4103]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5463]], label [[IF_THEN5465:%.*]], label [[IF_END5466:%.*]] +// SIMD-ONLY0: if.then5465: +// SIMD-ONLY0-NEXT: [[TMP4104:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4104]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5466]] +// SIMD-ONLY0: if.end5466: +// SIMD-ONLY0-NEXT: [[TMP4105:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4105]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4106:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4107:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5467:%.*]] = icmp ult i64 [[TMP4106]], [[TMP4107]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5467]], label [[IF_THEN5469:%.*]], label [[IF_END5470:%.*]] +// SIMD-ONLY0: if.then5469: +// SIMD-ONLY0-NEXT: [[TMP4108:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4108]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5470]] +// SIMD-ONLY0: if.end5470: +// SIMD-ONLY0-NEXT: [[TMP4109:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4109]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4110:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4111:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5471:%.*]] = icmp eq i64 [[TMP4110]], [[TMP4111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5471]], label [[IF_THEN5473:%.*]], label [[IF_END5474:%.*]] +// SIMD-ONLY0: if.then5473: +// SIMD-ONLY0-NEXT: [[TMP4112:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4112]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5474]] +// SIMD-ONLY0: if.end5474: +// SIMD-ONLY0-NEXT: [[TMP4113:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4113]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4114:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4115:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5475:%.*]] = icmp eq i64 [[TMP4114]], [[TMP4115]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5475]], label [[IF_THEN5477:%.*]], label [[IF_END5478:%.*]] +// SIMD-ONLY0: if.then5477: +// SIMD-ONLY0-NEXT: [[TMP4116:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4116]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5478]] +// SIMD-ONLY0: if.end5478: +// SIMD-ONLY0-NEXT: [[TMP4117:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4117]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4118:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4119:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5479:%.*]] = icmp eq i64 [[TMP4118]], [[TMP4119]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5479]], label [[IF_THEN5481:%.*]], label [[IF_ELSE5482:%.*]] +// SIMD-ONLY0: if.then5481: +// SIMD-ONLY0-NEXT: [[TMP4120:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4120]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5483:%.*]] +// SIMD-ONLY0: if.else5482: +// SIMD-ONLY0-NEXT: [[TMP4121:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4121]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5483]] +// SIMD-ONLY0: if.end5483: +// SIMD-ONLY0-NEXT: [[TMP4122:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4123:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5484:%.*]] = icmp eq i64 [[TMP4122]], [[TMP4123]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5484]], label [[IF_THEN5486:%.*]], label [[IF_ELSE5487:%.*]] +// SIMD-ONLY0: if.then5486: +// SIMD-ONLY0-NEXT: [[TMP4124:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4124]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5488:%.*]] +// SIMD-ONLY0: if.else5487: +// SIMD-ONLY0-NEXT: [[TMP4125:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4125]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5488]] +// SIMD-ONLY0: if.end5488: +// SIMD-ONLY0-NEXT: [[TMP4126:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4127:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5489:%.*]] = icmp eq i64 [[TMP4126]], [[TMP4127]] +// SIMD-ONLY0-NEXT: [[CONV5490:%.*]] = zext i1 [[CMP5489]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5491:%.*]] = sext i32 [[CONV5490]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5491]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4128:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5492:%.*]] = icmp ne i64 [[TMP4128]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5492]], label [[IF_THEN5493:%.*]], label [[IF_END5494:%.*]] +// SIMD-ONLY0: if.then5493: +// SIMD-ONLY0-NEXT: [[TMP4129:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4129]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5494]] +// SIMD-ONLY0: if.end5494: +// SIMD-ONLY0-NEXT: [[TMP4130:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4131:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5495:%.*]] = icmp eq i64 [[TMP4130]], [[TMP4131]] +// SIMD-ONLY0-NEXT: [[CONV5496:%.*]] = zext i1 [[CMP5495]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5497:%.*]] = sext i32 [[CONV5496]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5497]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4132:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5498:%.*]] = icmp ne i64 [[TMP4132]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5498]], label [[IF_THEN5499:%.*]], label [[IF_END5500:%.*]] +// SIMD-ONLY0: if.then5499: +// SIMD-ONLY0-NEXT: [[TMP4133:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4133]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5500]] +// SIMD-ONLY0: if.end5500: +// SIMD-ONLY0-NEXT: [[TMP4134:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4135:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5501:%.*]] = icmp eq i64 [[TMP4134]], [[TMP4135]] +// SIMD-ONLY0-NEXT: [[CONV5502:%.*]] = zext i1 [[CMP5501]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5503:%.*]] = sext i32 [[CONV5502]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5503]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4136:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5504:%.*]] = icmp ne i64 [[TMP4136]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5504]], label [[IF_THEN5505:%.*]], label [[IF_ELSE5506:%.*]] +// SIMD-ONLY0: if.then5505: +// SIMD-ONLY0-NEXT: [[TMP4137:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4137]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5507:%.*]] +// SIMD-ONLY0: if.else5506: +// SIMD-ONLY0-NEXT: [[TMP4138:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4138]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5507]] +// SIMD-ONLY0: if.end5507: +// SIMD-ONLY0-NEXT: [[TMP4139:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4140:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5508:%.*]] = icmp eq i64 [[TMP4139]], [[TMP4140]] +// SIMD-ONLY0-NEXT: [[CONV5509:%.*]] = zext i1 [[CMP5508]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5510:%.*]] = sext i32 [[CONV5509]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5510]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4141:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5511:%.*]] = icmp ne i64 [[TMP4141]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5511]], label [[IF_THEN5512:%.*]], label [[IF_ELSE5513:%.*]] +// SIMD-ONLY0: if.then5512: +// SIMD-ONLY0-NEXT: [[TMP4142:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4142]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5514:%.*]] +// SIMD-ONLY0: if.else5513: +// SIMD-ONLY0-NEXT: [[TMP4143:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4143]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5514]] +// SIMD-ONLY0: if.end5514: +// SIMD-ONLY0-NEXT: [[TMP4144:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4144]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4145:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4146:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5515:%.*]] = icmp ugt i64 [[TMP4145]], [[TMP4146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5515]], label [[IF_THEN5517:%.*]], label [[IF_END5518:%.*]] +// SIMD-ONLY0: if.then5517: +// SIMD-ONLY0-NEXT: [[TMP4147:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4147]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5518]] +// SIMD-ONLY0: if.end5518: +// SIMD-ONLY0-NEXT: [[TMP4148:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4148]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4149:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4150:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5519:%.*]] = icmp ugt i64 [[TMP4149]], [[TMP4150]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5519]], label [[IF_THEN5521:%.*]], label [[IF_END5522:%.*]] +// SIMD-ONLY0: if.then5521: +// SIMD-ONLY0-NEXT: [[TMP4151:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4151]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5522]] +// SIMD-ONLY0: if.end5522: +// SIMD-ONLY0-NEXT: [[TMP4152:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4152]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4153:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4154:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5523:%.*]] = icmp ult i64 [[TMP4153]], [[TMP4154]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5523]], label [[IF_THEN5525:%.*]], label [[IF_END5526:%.*]] +// SIMD-ONLY0: if.then5525: +// SIMD-ONLY0-NEXT: [[TMP4155:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4155]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5526]] +// SIMD-ONLY0: if.end5526: +// SIMD-ONLY0-NEXT: [[TMP4156:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4156]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4157:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4158:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5527:%.*]] = icmp ult i64 [[TMP4157]], [[TMP4158]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5527]], label [[IF_THEN5529:%.*]], label [[IF_END5530:%.*]] +// SIMD-ONLY0: if.then5529: +// SIMD-ONLY0-NEXT: [[TMP4159:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4159]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5530]] +// SIMD-ONLY0: if.end5530: +// SIMD-ONLY0-NEXT: [[TMP4160:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4160]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4161:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4162:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5531:%.*]] = icmp eq i64 [[TMP4161]], [[TMP4162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5531]], label [[IF_THEN5533:%.*]], label [[IF_END5534:%.*]] +// SIMD-ONLY0: if.then5533: +// SIMD-ONLY0-NEXT: [[TMP4163:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4163]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5534]] +// SIMD-ONLY0: if.end5534: +// SIMD-ONLY0-NEXT: [[TMP4164:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4164]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4165:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4166:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5535:%.*]] = icmp eq i64 [[TMP4165]], [[TMP4166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5535]], label [[IF_THEN5537:%.*]], label [[IF_END5538:%.*]] +// SIMD-ONLY0: if.then5537: +// SIMD-ONLY0-NEXT: [[TMP4167:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4167]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5538]] +// SIMD-ONLY0: if.end5538: +// SIMD-ONLY0-NEXT: [[TMP4168:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4169:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5539:%.*]] = icmp ugt i64 [[TMP4168]], [[TMP4169]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5539]], label [[IF_THEN5541:%.*]], label [[IF_END5542:%.*]] +// SIMD-ONLY0: if.then5541: +// SIMD-ONLY0-NEXT: [[TMP4170:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4170]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5542]] +// SIMD-ONLY0: if.end5542: +// SIMD-ONLY0-NEXT: [[TMP4171:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4171]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4172:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4173:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5543:%.*]] = icmp ugt i64 [[TMP4172]], [[TMP4173]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5543]], label [[IF_THEN5545:%.*]], label [[IF_END5546:%.*]] +// SIMD-ONLY0: if.then5545: +// SIMD-ONLY0-NEXT: [[TMP4174:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4174]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5546]] +// SIMD-ONLY0: if.end5546: +// SIMD-ONLY0-NEXT: [[TMP4175:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4175]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4176:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4177:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5547:%.*]] = icmp ult i64 [[TMP4176]], [[TMP4177]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5547]], label [[IF_THEN5549:%.*]], label [[IF_END5550:%.*]] +// SIMD-ONLY0: if.then5549: +// SIMD-ONLY0-NEXT: [[TMP4178:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4178]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5550]] +// SIMD-ONLY0: if.end5550: +// SIMD-ONLY0-NEXT: [[TMP4179:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4179]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4180:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4181:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5551:%.*]] = icmp ult i64 [[TMP4180]], [[TMP4181]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5551]], label [[IF_THEN5553:%.*]], label [[IF_END5554:%.*]] +// SIMD-ONLY0: if.then5553: +// SIMD-ONLY0-NEXT: [[TMP4182:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4182]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5554]] +// SIMD-ONLY0: if.end5554: +// SIMD-ONLY0-NEXT: [[TMP4183:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4183]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4184:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4185:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5555:%.*]] = icmp eq i64 [[TMP4184]], [[TMP4185]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5555]], label [[IF_THEN5557:%.*]], label [[IF_END5558:%.*]] +// SIMD-ONLY0: if.then5557: +// SIMD-ONLY0-NEXT: [[TMP4186:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4186]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5558]] +// SIMD-ONLY0: if.end5558: +// SIMD-ONLY0-NEXT: [[TMP4187:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4187]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4188:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4189:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5559:%.*]] = icmp eq i64 [[TMP4188]], [[TMP4189]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5559]], label [[IF_THEN5561:%.*]], label [[IF_END5562:%.*]] +// SIMD-ONLY0: if.then5561: +// SIMD-ONLY0-NEXT: [[TMP4190:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4190]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5562]] +// SIMD-ONLY0: if.end5562: +// SIMD-ONLY0-NEXT: [[TMP4191:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4191]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4192:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4193:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5563:%.*]] = icmp eq i64 [[TMP4192]], [[TMP4193]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5563]], label [[IF_THEN5565:%.*]], label [[IF_ELSE5566:%.*]] +// SIMD-ONLY0: if.then5565: +// SIMD-ONLY0-NEXT: [[TMP4194:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4194]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5567:%.*]] +// SIMD-ONLY0: if.else5566: +// SIMD-ONLY0-NEXT: [[TMP4195:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4195]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5567]] +// SIMD-ONLY0: if.end5567: +// SIMD-ONLY0-NEXT: [[TMP4196:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4197:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5568:%.*]] = icmp eq i64 [[TMP4196]], [[TMP4197]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5568]], label [[IF_THEN5570:%.*]], label [[IF_ELSE5571:%.*]] +// SIMD-ONLY0: if.then5570: +// SIMD-ONLY0-NEXT: [[TMP4198:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4198]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5572:%.*]] +// SIMD-ONLY0: if.else5571: +// SIMD-ONLY0-NEXT: [[TMP4199:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4199]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5572]] +// SIMD-ONLY0: if.end5572: +// SIMD-ONLY0-NEXT: [[TMP4200:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4201:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5573:%.*]] = icmp eq i64 [[TMP4200]], [[TMP4201]] +// SIMD-ONLY0-NEXT: [[CONV5574:%.*]] = zext i1 [[CMP5573]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5575:%.*]] = sext i32 [[CONV5574]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5575]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4202:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5576:%.*]] = icmp ne i64 [[TMP4202]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5576]], label [[IF_THEN5577:%.*]], label [[IF_END5578:%.*]] +// SIMD-ONLY0: if.then5577: +// SIMD-ONLY0-NEXT: [[TMP4203:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4203]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5578]] +// SIMD-ONLY0: if.end5578: +// SIMD-ONLY0-NEXT: [[TMP4204:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4205:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5579:%.*]] = icmp eq i64 [[TMP4204]], [[TMP4205]] +// SIMD-ONLY0-NEXT: [[CONV5580:%.*]] = zext i1 [[CMP5579]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5581:%.*]] = sext i32 [[CONV5580]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5581]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4206:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5582:%.*]] = icmp ne i64 [[TMP4206]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5582]], label [[IF_THEN5583:%.*]], label [[IF_END5584:%.*]] +// SIMD-ONLY0: if.then5583: +// SIMD-ONLY0-NEXT: [[TMP4207:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4207]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5584]] +// SIMD-ONLY0: if.end5584: +// SIMD-ONLY0-NEXT: [[TMP4208:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4209:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5585:%.*]] = icmp eq i64 [[TMP4208]], [[TMP4209]] +// SIMD-ONLY0-NEXT: [[CONV5586:%.*]] = zext i1 [[CMP5585]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5587:%.*]] = sext i32 [[CONV5586]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5587]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4210:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5588:%.*]] = icmp ne i64 [[TMP4210]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5588]], label [[IF_THEN5589:%.*]], label [[IF_ELSE5590:%.*]] +// SIMD-ONLY0: if.then5589: +// SIMD-ONLY0-NEXT: [[TMP4211:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4211]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5591:%.*]] +// SIMD-ONLY0: if.else5590: +// SIMD-ONLY0-NEXT: [[TMP4212:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4212]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5591]] +// SIMD-ONLY0: if.end5591: +// SIMD-ONLY0-NEXT: [[TMP4213:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4214:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5592:%.*]] = icmp eq i64 [[TMP4213]], [[TMP4214]] +// SIMD-ONLY0-NEXT: [[CONV5593:%.*]] = zext i1 [[CMP5592]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5594:%.*]] = sext i32 [[CONV5593]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5594]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4215:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5595:%.*]] = icmp ne i64 [[TMP4215]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5595]], label [[IF_THEN5596:%.*]], label [[IF_ELSE5597:%.*]] +// SIMD-ONLY0: if.then5596: +// SIMD-ONLY0-NEXT: [[TMP4216:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4216]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5598:%.*]] +// SIMD-ONLY0: if.else5597: +// SIMD-ONLY0-NEXT: [[TMP4217:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4217]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5598]] +// SIMD-ONLY0: if.end5598: +// SIMD-ONLY0-NEXT: [[TMP4218:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4218]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4219:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4220:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5599:%.*]] = icmp ugt i64 [[TMP4219]], [[TMP4220]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5599]], label [[IF_THEN5601:%.*]], label [[IF_END5602:%.*]] +// SIMD-ONLY0: if.then5601: +// SIMD-ONLY0-NEXT: [[TMP4221:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4221]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5602]] +// SIMD-ONLY0: if.end5602: +// SIMD-ONLY0-NEXT: [[TMP4222:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4222]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4223:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4224:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5603:%.*]] = icmp ugt i64 [[TMP4223]], [[TMP4224]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5603]], label [[IF_THEN5605:%.*]], label [[IF_END5606:%.*]] +// SIMD-ONLY0: if.then5605: +// SIMD-ONLY0-NEXT: [[TMP4225:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4225]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5606]] +// SIMD-ONLY0: if.end5606: +// SIMD-ONLY0-NEXT: [[TMP4226:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4226]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4227:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4228:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5607:%.*]] = icmp ult i64 [[TMP4227]], [[TMP4228]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5607]], label [[IF_THEN5609:%.*]], label [[IF_END5610:%.*]] +// SIMD-ONLY0: if.then5609: +// SIMD-ONLY0-NEXT: [[TMP4229:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4229]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5610]] +// SIMD-ONLY0: if.end5610: +// SIMD-ONLY0-NEXT: [[TMP4230:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4230]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4231:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4232:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5611:%.*]] = icmp ult i64 [[TMP4231]], [[TMP4232]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5611]], label [[IF_THEN5613:%.*]], label [[IF_END5614:%.*]] +// SIMD-ONLY0: if.then5613: +// SIMD-ONLY0-NEXT: [[TMP4233:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4233]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5614]] +// SIMD-ONLY0: if.end5614: +// SIMD-ONLY0-NEXT: [[TMP4234:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4234]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4235:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4236:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5615:%.*]] = icmp eq i64 [[TMP4235]], [[TMP4236]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5615]], label [[IF_THEN5617:%.*]], label [[IF_END5618:%.*]] +// SIMD-ONLY0: if.then5617: +// SIMD-ONLY0-NEXT: [[TMP4237:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4237]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5618]] +// SIMD-ONLY0: if.end5618: +// SIMD-ONLY0-NEXT: [[TMP4238:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4238]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4239:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4240:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5619:%.*]] = icmp eq i64 [[TMP4239]], [[TMP4240]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5619]], label [[IF_THEN5621:%.*]], label [[IF_END5622:%.*]] +// SIMD-ONLY0: if.then5621: +// SIMD-ONLY0-NEXT: [[TMP4241:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4241]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5622]] +// SIMD-ONLY0: if.end5622: +// SIMD-ONLY0-NEXT: [[TMP4242:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4243:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5623:%.*]] = icmp ugt i64 [[TMP4242]], [[TMP4243]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5623]], label [[IF_THEN5625:%.*]], label [[IF_END5626:%.*]] +// SIMD-ONLY0: if.then5625: +// SIMD-ONLY0-NEXT: [[TMP4244:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4244]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5626]] +// SIMD-ONLY0: if.end5626: +// SIMD-ONLY0-NEXT: [[TMP4245:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4245]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4246:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4247:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5627:%.*]] = icmp ugt i64 [[TMP4246]], [[TMP4247]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5627]], label [[IF_THEN5629:%.*]], label [[IF_END5630:%.*]] +// SIMD-ONLY0: if.then5629: +// SIMD-ONLY0-NEXT: [[TMP4248:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4248]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5630]] +// SIMD-ONLY0: if.end5630: +// SIMD-ONLY0-NEXT: [[TMP4249:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4249]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4250:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4251:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5631:%.*]] = icmp ult i64 [[TMP4250]], [[TMP4251]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5631]], label [[IF_THEN5633:%.*]], label [[IF_END5634:%.*]] +// SIMD-ONLY0: if.then5633: +// SIMD-ONLY0-NEXT: [[TMP4252:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4252]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5634]] +// SIMD-ONLY0: if.end5634: +// SIMD-ONLY0-NEXT: [[TMP4253:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4253]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4254:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4255:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5635:%.*]] = icmp ult i64 [[TMP4254]], [[TMP4255]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5635]], label [[IF_THEN5637:%.*]], label [[IF_END5638:%.*]] +// SIMD-ONLY0: if.then5637: +// SIMD-ONLY0-NEXT: [[TMP4256:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4256]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5638]] +// SIMD-ONLY0: if.end5638: +// SIMD-ONLY0-NEXT: [[TMP4257:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4257]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4258:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4259:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5639:%.*]] = icmp eq i64 [[TMP4258]], [[TMP4259]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5639]], label [[IF_THEN5641:%.*]], label [[IF_END5642:%.*]] +// SIMD-ONLY0: if.then5641: +// SIMD-ONLY0-NEXT: [[TMP4260:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4260]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5642]] +// SIMD-ONLY0: if.end5642: +// SIMD-ONLY0-NEXT: [[TMP4261:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4261]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4262:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4263:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5643:%.*]] = icmp eq i64 [[TMP4262]], [[TMP4263]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5643]], label [[IF_THEN5645:%.*]], label [[IF_END5646:%.*]] +// SIMD-ONLY0: if.then5645: +// SIMD-ONLY0-NEXT: [[TMP4264:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4264]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5646]] +// SIMD-ONLY0: if.end5646: +// SIMD-ONLY0-NEXT: [[TMP4265:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4265]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4266:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4267:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5647:%.*]] = icmp eq i64 [[TMP4266]], [[TMP4267]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5647]], label [[IF_THEN5649:%.*]], label [[IF_ELSE5650:%.*]] +// SIMD-ONLY0: if.then5649: +// SIMD-ONLY0-NEXT: [[TMP4268:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4268]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5651:%.*]] +// SIMD-ONLY0: if.else5650: +// SIMD-ONLY0-NEXT: [[TMP4269:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4269]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5651]] +// SIMD-ONLY0: if.end5651: +// SIMD-ONLY0-NEXT: [[TMP4270:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4271:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5652:%.*]] = icmp eq i64 [[TMP4270]], [[TMP4271]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5652]], label [[IF_THEN5654:%.*]], label [[IF_ELSE5655:%.*]] +// SIMD-ONLY0: if.then5654: +// SIMD-ONLY0-NEXT: [[TMP4272:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4272]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5656:%.*]] +// SIMD-ONLY0: if.else5655: +// SIMD-ONLY0-NEXT: [[TMP4273:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4273]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5656]] +// SIMD-ONLY0: if.end5656: +// SIMD-ONLY0-NEXT: [[TMP4274:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4275:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5657:%.*]] = icmp eq i64 [[TMP4274]], [[TMP4275]] +// SIMD-ONLY0-NEXT: [[CONV5658:%.*]] = zext i1 [[CMP5657]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5659:%.*]] = sext i32 [[CONV5658]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5659]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4276:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5660:%.*]] = icmp ne i64 [[TMP4276]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5660]], label [[IF_THEN5661:%.*]], label [[IF_END5662:%.*]] +// SIMD-ONLY0: if.then5661: +// SIMD-ONLY0-NEXT: [[TMP4277:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4277]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5662]] +// SIMD-ONLY0: if.end5662: +// SIMD-ONLY0-NEXT: [[TMP4278:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4279:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5663:%.*]] = icmp eq i64 [[TMP4278]], [[TMP4279]] +// SIMD-ONLY0-NEXT: [[CONV5664:%.*]] = zext i1 [[CMP5663]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5665:%.*]] = sext i32 [[CONV5664]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5665]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4280:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5666:%.*]] = icmp ne i64 [[TMP4280]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5666]], label [[IF_THEN5667:%.*]], label [[IF_END5668:%.*]] +// SIMD-ONLY0: if.then5667: +// SIMD-ONLY0-NEXT: [[TMP4281:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4281]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5668]] +// SIMD-ONLY0: if.end5668: +// SIMD-ONLY0-NEXT: [[TMP4282:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4283:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5669:%.*]] = icmp eq i64 [[TMP4282]], [[TMP4283]] +// SIMD-ONLY0-NEXT: [[CONV5670:%.*]] = zext i1 [[CMP5669]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5671:%.*]] = sext i32 [[CONV5670]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5671]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4284:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5672:%.*]] = icmp ne i64 [[TMP4284]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5672]], label [[IF_THEN5673:%.*]], label [[IF_ELSE5674:%.*]] +// SIMD-ONLY0: if.then5673: +// SIMD-ONLY0-NEXT: [[TMP4285:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4285]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5675:%.*]] +// SIMD-ONLY0: if.else5674: +// SIMD-ONLY0-NEXT: [[TMP4286:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4286]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5675]] +// SIMD-ONLY0: if.end5675: +// SIMD-ONLY0-NEXT: [[TMP4287:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4288:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5676:%.*]] = icmp eq i64 [[TMP4287]], [[TMP4288]] +// SIMD-ONLY0-NEXT: [[CONV5677:%.*]] = zext i1 [[CMP5676]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5678:%.*]] = sext i32 [[CONV5677]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5678]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4289:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5679:%.*]] = icmp ne i64 [[TMP4289]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5679]], label [[IF_THEN5680:%.*]], label [[IF_ELSE5681:%.*]] +// SIMD-ONLY0: if.then5680: +// SIMD-ONLY0-NEXT: [[TMP4290:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4290]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5682:%.*]] +// SIMD-ONLY0: if.else5681: +// SIMD-ONLY0-NEXT: [[TMP4291:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4291]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5682]] +// SIMD-ONLY0: if.end5682: +// SIMD-ONLY0-NEXT: [[TMP4292:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4292]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4293:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4294:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5683:%.*]] = icmp ugt i64 [[TMP4293]], [[TMP4294]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5683]], label [[IF_THEN5685:%.*]], label [[IF_END5686:%.*]] +// SIMD-ONLY0: if.then5685: +// SIMD-ONLY0-NEXT: [[TMP4295:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4295]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5686]] +// SIMD-ONLY0: if.end5686: +// SIMD-ONLY0-NEXT: [[TMP4296:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4296]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4297:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4298:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5687:%.*]] = icmp ugt i64 [[TMP4297]], [[TMP4298]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5687]], label [[IF_THEN5689:%.*]], label [[IF_END5690:%.*]] +// SIMD-ONLY0: if.then5689: +// SIMD-ONLY0-NEXT: [[TMP4299:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4299]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5690]] +// SIMD-ONLY0: if.end5690: +// SIMD-ONLY0-NEXT: [[TMP4300:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4300]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4301:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4302:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5691:%.*]] = icmp ult i64 [[TMP4301]], [[TMP4302]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5691]], label [[IF_THEN5693:%.*]], label [[IF_END5694:%.*]] +// SIMD-ONLY0: if.then5693: +// SIMD-ONLY0-NEXT: [[TMP4303:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4303]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5694]] +// SIMD-ONLY0: if.end5694: +// SIMD-ONLY0-NEXT: [[TMP4304:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4304]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4305:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4306:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5695:%.*]] = icmp ult i64 [[TMP4305]], [[TMP4306]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5695]], label [[IF_THEN5697:%.*]], label [[IF_END5698:%.*]] +// SIMD-ONLY0: if.then5697: +// SIMD-ONLY0-NEXT: [[TMP4307:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4307]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5698]] +// SIMD-ONLY0: if.end5698: +// SIMD-ONLY0-NEXT: [[TMP4308:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4308]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4309:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4310:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5699:%.*]] = icmp eq i64 [[TMP4309]], [[TMP4310]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5699]], label [[IF_THEN5701:%.*]], label [[IF_END5702:%.*]] +// SIMD-ONLY0: if.then5701: +// SIMD-ONLY0-NEXT: [[TMP4311:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4311]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5702]] +// SIMD-ONLY0: if.end5702: +// SIMD-ONLY0-NEXT: [[TMP4312:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4312]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4313:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4314:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5703:%.*]] = icmp eq i64 [[TMP4313]], [[TMP4314]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5703]], label [[IF_THEN5705:%.*]], label [[IF_END5706:%.*]] +// SIMD-ONLY0: if.then5705: +// SIMD-ONLY0-NEXT: [[TMP4315:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4315]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5706]] +// SIMD-ONLY0: if.end5706: +// SIMD-ONLY0-NEXT: [[TMP4316:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4317:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5707:%.*]] = icmp ugt i64 [[TMP4316]], [[TMP4317]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5707]], label [[IF_THEN5709:%.*]], label [[IF_END5710:%.*]] +// SIMD-ONLY0: if.then5709: +// SIMD-ONLY0-NEXT: [[TMP4318:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4318]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5710]] +// SIMD-ONLY0: if.end5710: +// SIMD-ONLY0-NEXT: [[TMP4319:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4319]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4320:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4321:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5711:%.*]] = icmp ugt i64 [[TMP4320]], [[TMP4321]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5711]], label [[IF_THEN5713:%.*]], label [[IF_END5714:%.*]] +// SIMD-ONLY0: if.then5713: +// SIMD-ONLY0-NEXT: [[TMP4322:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4322]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5714]] +// SIMD-ONLY0: if.end5714: +// SIMD-ONLY0-NEXT: [[TMP4323:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4323]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4324:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4325:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5715:%.*]] = icmp ult i64 [[TMP4324]], [[TMP4325]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5715]], label [[IF_THEN5717:%.*]], label [[IF_END5718:%.*]] +// SIMD-ONLY0: if.then5717: +// SIMD-ONLY0-NEXT: [[TMP4326:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4326]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5718]] +// SIMD-ONLY0: if.end5718: +// SIMD-ONLY0-NEXT: [[TMP4327:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4327]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4328:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4329:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5719:%.*]] = icmp ult i64 [[TMP4328]], [[TMP4329]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5719]], label [[IF_THEN5721:%.*]], label [[IF_END5722:%.*]] +// SIMD-ONLY0: if.then5721: +// SIMD-ONLY0-NEXT: [[TMP4330:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4330]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5722]] +// SIMD-ONLY0: if.end5722: +// SIMD-ONLY0-NEXT: [[TMP4331:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4331]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4332:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4333:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5723:%.*]] = icmp eq i64 [[TMP4332]], [[TMP4333]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5723]], label [[IF_THEN5725:%.*]], label [[IF_END5726:%.*]] +// SIMD-ONLY0: if.then5725: +// SIMD-ONLY0-NEXT: [[TMP4334:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4334]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5726]] +// SIMD-ONLY0: if.end5726: +// SIMD-ONLY0-NEXT: [[TMP4335:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4335]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4336:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4337:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5727:%.*]] = icmp eq i64 [[TMP4336]], [[TMP4337]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5727]], label [[IF_THEN5729:%.*]], label [[IF_END5730:%.*]] +// SIMD-ONLY0: if.then5729: +// SIMD-ONLY0-NEXT: [[TMP4338:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4338]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5730]] +// SIMD-ONLY0: if.end5730: +// SIMD-ONLY0-NEXT: [[TMP4339:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4339]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4340:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4341:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5731:%.*]] = icmp eq i64 [[TMP4340]], [[TMP4341]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5731]], label [[IF_THEN5733:%.*]], label [[IF_ELSE5734:%.*]] +// SIMD-ONLY0: if.then5733: +// SIMD-ONLY0-NEXT: [[TMP4342:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4342]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5735:%.*]] +// SIMD-ONLY0: if.else5734: +// SIMD-ONLY0-NEXT: [[TMP4343:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4343]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5735]] +// SIMD-ONLY0: if.end5735: +// SIMD-ONLY0-NEXT: [[TMP4344:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4345:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5736:%.*]] = icmp eq i64 [[TMP4344]], [[TMP4345]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5736]], label [[IF_THEN5738:%.*]], label [[IF_ELSE5739:%.*]] +// SIMD-ONLY0: if.then5738: +// SIMD-ONLY0-NEXT: [[TMP4346:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4346]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5740:%.*]] +// SIMD-ONLY0: if.else5739: +// SIMD-ONLY0-NEXT: [[TMP4347:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4347]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5740]] +// SIMD-ONLY0: if.end5740: +// SIMD-ONLY0-NEXT: [[TMP4348:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4349:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5741:%.*]] = icmp eq i64 [[TMP4348]], [[TMP4349]] +// SIMD-ONLY0-NEXT: [[CONV5742:%.*]] = zext i1 [[CMP5741]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5743:%.*]] = sext i32 [[CONV5742]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5743]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4350:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5744:%.*]] = icmp ne i64 [[TMP4350]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5744]], label [[IF_THEN5745:%.*]], label [[IF_END5746:%.*]] +// SIMD-ONLY0: if.then5745: +// SIMD-ONLY0-NEXT: [[TMP4351:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4351]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5746]] +// SIMD-ONLY0: if.end5746: +// SIMD-ONLY0-NEXT: [[TMP4352:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4353:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5747:%.*]] = icmp eq i64 [[TMP4352]], [[TMP4353]] +// SIMD-ONLY0-NEXT: [[CONV5748:%.*]] = zext i1 [[CMP5747]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5749:%.*]] = sext i32 [[CONV5748]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5749]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4354:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5750:%.*]] = icmp ne i64 [[TMP4354]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5750]], label [[IF_THEN5751:%.*]], label [[IF_END5752:%.*]] +// SIMD-ONLY0: if.then5751: +// SIMD-ONLY0-NEXT: [[TMP4355:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4355]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5752]] +// SIMD-ONLY0: if.end5752: +// SIMD-ONLY0-NEXT: [[TMP4356:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4357:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5753:%.*]] = icmp eq i64 [[TMP4356]], [[TMP4357]] +// SIMD-ONLY0-NEXT: [[CONV5754:%.*]] = zext i1 [[CMP5753]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5755:%.*]] = sext i32 [[CONV5754]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5755]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4358:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5756:%.*]] = icmp ne i64 [[TMP4358]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5756]], label [[IF_THEN5757:%.*]], label [[IF_ELSE5758:%.*]] +// SIMD-ONLY0: if.then5757: +// SIMD-ONLY0-NEXT: [[TMP4359:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4359]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5759:%.*]] +// SIMD-ONLY0: if.else5758: +// SIMD-ONLY0-NEXT: [[TMP4360:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4360]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5759]] +// SIMD-ONLY0: if.end5759: +// SIMD-ONLY0-NEXT: [[TMP4361:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4362:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5760:%.*]] = icmp eq i64 [[TMP4361]], [[TMP4362]] +// SIMD-ONLY0-NEXT: [[CONV5761:%.*]] = zext i1 [[CMP5760]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5762:%.*]] = sext i32 [[CONV5761]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5762]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4363:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5763:%.*]] = icmp ne i64 [[TMP4363]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5763]], label [[IF_THEN5764:%.*]], label [[IF_ELSE5765:%.*]] +// SIMD-ONLY0: if.then5764: +// SIMD-ONLY0-NEXT: [[TMP4364:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4364]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5766:%.*]] +// SIMD-ONLY0: if.else5765: +// SIMD-ONLY0-NEXT: [[TMP4365:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4365]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5766]] +// SIMD-ONLY0: if.end5766: +// SIMD-ONLY0-NEXT: [[TMP4366:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4366]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4367:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4368:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5767:%.*]] = icmp ugt i64 [[TMP4367]], [[TMP4368]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5767]], label [[IF_THEN5769:%.*]], label [[IF_END5770:%.*]] +// SIMD-ONLY0: if.then5769: +// SIMD-ONLY0-NEXT: [[TMP4369:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4369]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5770]] +// SIMD-ONLY0: if.end5770: +// SIMD-ONLY0-NEXT: [[TMP4370:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4370]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4371:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4372:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5771:%.*]] = icmp ugt i64 [[TMP4371]], [[TMP4372]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5771]], label [[IF_THEN5773:%.*]], label [[IF_END5774:%.*]] +// SIMD-ONLY0: if.then5773: +// SIMD-ONLY0-NEXT: [[TMP4373:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4373]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5774]] +// SIMD-ONLY0: if.end5774: +// SIMD-ONLY0-NEXT: [[TMP4374:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4374]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4375:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4376:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5775:%.*]] = icmp ult i64 [[TMP4375]], [[TMP4376]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5775]], label [[IF_THEN5777:%.*]], label [[IF_END5778:%.*]] +// SIMD-ONLY0: if.then5777: +// SIMD-ONLY0-NEXT: [[TMP4377:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4377]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5778]] +// SIMD-ONLY0: if.end5778: +// SIMD-ONLY0-NEXT: [[TMP4378:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4378]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4379:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4380:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5779:%.*]] = icmp ult i64 [[TMP4379]], [[TMP4380]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5779]], label [[IF_THEN5781:%.*]], label [[IF_END5782:%.*]] +// SIMD-ONLY0: if.then5781: +// SIMD-ONLY0-NEXT: [[TMP4381:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4381]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5782]] +// SIMD-ONLY0: if.end5782: +// SIMD-ONLY0-NEXT: [[TMP4382:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4382]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4383:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4384:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5783:%.*]] = icmp eq i64 [[TMP4383]], [[TMP4384]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5783]], label [[IF_THEN5785:%.*]], label [[IF_END5786:%.*]] +// SIMD-ONLY0: if.then5785: +// SIMD-ONLY0-NEXT: [[TMP4385:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4385]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5786]] +// SIMD-ONLY0: if.end5786: +// SIMD-ONLY0-NEXT: [[TMP4386:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4386]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4387:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4388:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5787:%.*]] = icmp eq i64 [[TMP4387]], [[TMP4388]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5787]], label [[IF_THEN5789:%.*]], label [[IF_END5790:%.*]] +// SIMD-ONLY0: if.then5789: +// SIMD-ONLY0-NEXT: [[TMP4389:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4389]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5790]] +// SIMD-ONLY0: if.end5790: +// SIMD-ONLY0-NEXT: [[TMP4390:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4391:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5791:%.*]] = icmp ugt i64 [[TMP4390]], [[TMP4391]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5791]], label [[IF_THEN5793:%.*]], label [[IF_END5794:%.*]] +// SIMD-ONLY0: if.then5793: +// SIMD-ONLY0-NEXT: [[TMP4392:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4392]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5794]] +// SIMD-ONLY0: if.end5794: +// SIMD-ONLY0-NEXT: [[TMP4393:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4393]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4394:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4395:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5795:%.*]] = icmp ugt i64 [[TMP4394]], [[TMP4395]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5795]], label [[IF_THEN5797:%.*]], label [[IF_END5798:%.*]] +// SIMD-ONLY0: if.then5797: +// SIMD-ONLY0-NEXT: [[TMP4396:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4396]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5798]] +// SIMD-ONLY0: if.end5798: +// SIMD-ONLY0-NEXT: [[TMP4397:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4397]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4398:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4399:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5799:%.*]] = icmp ult i64 [[TMP4398]], [[TMP4399]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5799]], label [[IF_THEN5801:%.*]], label [[IF_END5802:%.*]] +// SIMD-ONLY0: if.then5801: +// SIMD-ONLY0-NEXT: [[TMP4400:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4400]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5802]] +// SIMD-ONLY0: if.end5802: +// SIMD-ONLY0-NEXT: [[TMP4401:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4401]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4402:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4403:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5803:%.*]] = icmp ult i64 [[TMP4402]], [[TMP4403]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5803]], label [[IF_THEN5805:%.*]], label [[IF_END5806:%.*]] +// SIMD-ONLY0: if.then5805: +// SIMD-ONLY0-NEXT: [[TMP4404:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4404]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5806]] +// SIMD-ONLY0: if.end5806: +// SIMD-ONLY0-NEXT: [[TMP4405:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4405]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4406:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4407:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5807:%.*]] = icmp eq i64 [[TMP4406]], [[TMP4407]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5807]], label [[IF_THEN5809:%.*]], label [[IF_END5810:%.*]] +// SIMD-ONLY0: if.then5809: +// SIMD-ONLY0-NEXT: [[TMP4408:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4408]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5810]] +// SIMD-ONLY0: if.end5810: +// SIMD-ONLY0-NEXT: [[TMP4409:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4409]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4410:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4411:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5811:%.*]] = icmp eq i64 [[TMP4410]], [[TMP4411]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5811]], label [[IF_THEN5813:%.*]], label [[IF_END5814:%.*]] +// SIMD-ONLY0: if.then5813: +// SIMD-ONLY0-NEXT: [[TMP4412:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4412]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5814]] +// SIMD-ONLY0: if.end5814: +// SIMD-ONLY0-NEXT: [[TMP4413:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4413]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4414:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4415:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5815:%.*]] = icmp eq i64 [[TMP4414]], [[TMP4415]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5815]], label [[IF_THEN5817:%.*]], label [[IF_ELSE5818:%.*]] +// SIMD-ONLY0: if.then5817: +// SIMD-ONLY0-NEXT: [[TMP4416:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4416]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5819:%.*]] +// SIMD-ONLY0: if.else5818: +// SIMD-ONLY0-NEXT: [[TMP4417:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4417]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5819]] +// SIMD-ONLY0: if.end5819: +// SIMD-ONLY0-NEXT: [[TMP4418:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4419:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5820:%.*]] = icmp eq i64 [[TMP4418]], [[TMP4419]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5820]], label [[IF_THEN5822:%.*]], label [[IF_ELSE5823:%.*]] +// SIMD-ONLY0: if.then5822: +// SIMD-ONLY0-NEXT: [[TMP4420:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4420]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5824:%.*]] +// SIMD-ONLY0: if.else5823: +// SIMD-ONLY0-NEXT: [[TMP4421:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4421]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5824]] +// SIMD-ONLY0: if.end5824: +// SIMD-ONLY0-NEXT: [[TMP4422:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4423:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5825:%.*]] = icmp eq i64 [[TMP4422]], [[TMP4423]] +// SIMD-ONLY0-NEXT: [[CONV5826:%.*]] = zext i1 [[CMP5825]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5827:%.*]] = sext i32 [[CONV5826]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5827]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4424:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5828:%.*]] = icmp ne i64 [[TMP4424]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5828]], label [[IF_THEN5829:%.*]], label [[IF_END5830:%.*]] +// SIMD-ONLY0: if.then5829: +// SIMD-ONLY0-NEXT: [[TMP4425:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4425]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5830]] +// SIMD-ONLY0: if.end5830: +// SIMD-ONLY0-NEXT: [[TMP4426:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4427:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5831:%.*]] = icmp eq i64 [[TMP4426]], [[TMP4427]] +// SIMD-ONLY0-NEXT: [[CONV5832:%.*]] = zext i1 [[CMP5831]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5833:%.*]] = sext i32 [[CONV5832]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5833]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4428:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5834:%.*]] = icmp ne i64 [[TMP4428]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5834]], label [[IF_THEN5835:%.*]], label [[IF_END5836:%.*]] +// SIMD-ONLY0: if.then5835: +// SIMD-ONLY0-NEXT: [[TMP4429:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4429]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5836]] +// SIMD-ONLY0: if.end5836: +// SIMD-ONLY0-NEXT: [[TMP4430:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4431:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5837:%.*]] = icmp eq i64 [[TMP4430]], [[TMP4431]] +// SIMD-ONLY0-NEXT: [[CONV5838:%.*]] = zext i1 [[CMP5837]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5839:%.*]] = sext i32 [[CONV5838]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5839]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4432:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5840:%.*]] = icmp ne i64 [[TMP4432]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5840]], label [[IF_THEN5841:%.*]], label [[IF_ELSE5842:%.*]] +// SIMD-ONLY0: if.then5841: +// SIMD-ONLY0-NEXT: [[TMP4433:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4433]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5843:%.*]] +// SIMD-ONLY0: if.else5842: +// SIMD-ONLY0-NEXT: [[TMP4434:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4434]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5843]] +// SIMD-ONLY0: if.end5843: +// SIMD-ONLY0-NEXT: [[TMP4435:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4436:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP5844:%.*]] = icmp eq i64 [[TMP4435]], [[TMP4436]] +// SIMD-ONLY0-NEXT: [[CONV5845:%.*]] = zext i1 [[CMP5844]] to i32 +// SIMD-ONLY0-NEXT: [[CONV5846:%.*]] = sext i32 [[CONV5845]] to i64 +// SIMD-ONLY0-NEXT: store i64 [[CONV5846]], ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4437:%.*]] = load i64, ptr [[ULLR]], align 8 +// SIMD-ONLY0-NEXT: [[TOBOOL5847:%.*]] = icmp ne i64 [[TMP4437]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5847]], label [[IF_THEN5848:%.*]], label [[IF_ELSE5849:%.*]] +// SIMD-ONLY0: if.then5848: +// SIMD-ONLY0-NEXT: [[TMP4438:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4438]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5850:%.*]] +// SIMD-ONLY0: if.else5849: +// SIMD-ONLY0-NEXT: [[TMP4439:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4439]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END5850]] +// SIMD-ONLY0: if.end5850: +// SIMD-ONLY0-NEXT: [[TMP4440:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4440]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4441:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4442:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5851:%.*]] = fcmp ogt float [[TMP4441]], [[TMP4442]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5851]], label [[IF_THEN5853:%.*]], label [[IF_END5854:%.*]] +// SIMD-ONLY0: if.then5853: +// SIMD-ONLY0-NEXT: [[TMP4443:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4443]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5854]] +// SIMD-ONLY0: if.end5854: +// SIMD-ONLY0-NEXT: [[TMP4444:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4444]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4445:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4446:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5855:%.*]] = fcmp ogt float [[TMP4445]], [[TMP4446]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5855]], label [[IF_THEN5857:%.*]], label [[IF_END5858:%.*]] +// SIMD-ONLY0: if.then5857: +// SIMD-ONLY0-NEXT: [[TMP4447:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4447]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5858]] +// SIMD-ONLY0: if.end5858: +// SIMD-ONLY0-NEXT: [[TMP4448:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4448]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4449:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4450:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5859:%.*]] = fcmp olt float [[TMP4449]], [[TMP4450]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5859]], label [[IF_THEN5861:%.*]], label [[IF_END5862:%.*]] +// SIMD-ONLY0: if.then5861: +// SIMD-ONLY0-NEXT: [[TMP4451:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4451]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5862]] +// SIMD-ONLY0: if.end5862: +// SIMD-ONLY0-NEXT: [[TMP4452:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4452]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4453:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4454:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5863:%.*]] = fcmp olt float [[TMP4453]], [[TMP4454]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5863]], label [[IF_THEN5865:%.*]], label [[IF_END5866:%.*]] +// SIMD-ONLY0: if.then5865: +// SIMD-ONLY0-NEXT: [[TMP4455:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4455]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5866]] +// SIMD-ONLY0: if.end5866: +// SIMD-ONLY0-NEXT: [[TMP4456:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4456]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4457:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4458:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5867:%.*]] = fcmp oeq float [[TMP4457]], [[TMP4458]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5867]], label [[IF_THEN5869:%.*]], label [[IF_END5870:%.*]] +// SIMD-ONLY0: if.then5869: +// SIMD-ONLY0-NEXT: [[TMP4459:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4459]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5870]] +// SIMD-ONLY0: if.end5870: +// SIMD-ONLY0-NEXT: [[TMP4460:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4460]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4461:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4462:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5871:%.*]] = fcmp oeq float [[TMP4461]], [[TMP4462]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5871]], label [[IF_THEN5873:%.*]], label [[IF_END5874:%.*]] +// SIMD-ONLY0: if.then5873: +// SIMD-ONLY0-NEXT: [[TMP4463:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4463]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5874]] +// SIMD-ONLY0: if.end5874: +// SIMD-ONLY0-NEXT: [[TMP4464:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4465:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5875:%.*]] = fcmp ogt float [[TMP4464]], [[TMP4465]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5875]], label [[IF_THEN5877:%.*]], label [[IF_END5878:%.*]] +// SIMD-ONLY0: if.then5877: +// SIMD-ONLY0-NEXT: [[TMP4466:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4466]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5878]] +// SIMD-ONLY0: if.end5878: +// SIMD-ONLY0-NEXT: [[TMP4467:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4467]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4468:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4469:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5879:%.*]] = fcmp ogt float [[TMP4468]], [[TMP4469]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5879]], label [[IF_THEN5881:%.*]], label [[IF_END5882:%.*]] +// SIMD-ONLY0: if.then5881: +// SIMD-ONLY0-NEXT: [[TMP4470:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4470]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5882]] +// SIMD-ONLY0: if.end5882: +// SIMD-ONLY0-NEXT: [[TMP4471:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4471]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4472:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4473:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5883:%.*]] = fcmp olt float [[TMP4472]], [[TMP4473]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5883]], label [[IF_THEN5885:%.*]], label [[IF_END5886:%.*]] +// SIMD-ONLY0: if.then5885: +// SIMD-ONLY0-NEXT: [[TMP4474:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4474]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5886]] +// SIMD-ONLY0: if.end5886: +// SIMD-ONLY0-NEXT: [[TMP4475:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4475]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4476:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4477:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5887:%.*]] = fcmp olt float [[TMP4476]], [[TMP4477]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5887]], label [[IF_THEN5889:%.*]], label [[IF_END5890:%.*]] +// SIMD-ONLY0: if.then5889: +// SIMD-ONLY0-NEXT: [[TMP4478:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4478]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5890]] +// SIMD-ONLY0: if.end5890: +// SIMD-ONLY0-NEXT: [[TMP4479:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4479]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4480:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4481:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5891:%.*]] = fcmp oeq float [[TMP4480]], [[TMP4481]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5891]], label [[IF_THEN5893:%.*]], label [[IF_END5894:%.*]] +// SIMD-ONLY0: if.then5893: +// SIMD-ONLY0-NEXT: [[TMP4482:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4482]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5894]] +// SIMD-ONLY0: if.end5894: +// SIMD-ONLY0-NEXT: [[TMP4483:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4483]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4484:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4485:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5895:%.*]] = fcmp oeq float [[TMP4484]], [[TMP4485]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5895]], label [[IF_THEN5897:%.*]], label [[IF_END5898:%.*]] +// SIMD-ONLY0: if.then5897: +// SIMD-ONLY0-NEXT: [[TMP4486:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4486]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5898]] +// SIMD-ONLY0: if.end5898: +// SIMD-ONLY0-NEXT: [[TMP4487:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4487]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4488:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4489:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5899:%.*]] = fcmp oeq float [[TMP4488]], [[TMP4489]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5899]], label [[IF_THEN5901:%.*]], label [[IF_ELSE5902:%.*]] +// SIMD-ONLY0: if.then5901: +// SIMD-ONLY0-NEXT: [[TMP4490:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4490]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5903:%.*]] +// SIMD-ONLY0: if.else5902: +// SIMD-ONLY0-NEXT: [[TMP4491:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4491]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5903]] +// SIMD-ONLY0: if.end5903: +// SIMD-ONLY0-NEXT: [[TMP4492:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4493:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5904:%.*]] = fcmp oeq float [[TMP4492]], [[TMP4493]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5904]], label [[IF_THEN5906:%.*]], label [[IF_ELSE5907:%.*]] +// SIMD-ONLY0: if.then5906: +// SIMD-ONLY0-NEXT: [[TMP4494:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4494]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5908:%.*]] +// SIMD-ONLY0: if.else5907: +// SIMD-ONLY0-NEXT: [[TMP4495:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4495]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5908]] +// SIMD-ONLY0: if.end5908: +// SIMD-ONLY0-NEXT: [[TMP4496:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4497:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5909:%.*]] = fcmp oeq float [[TMP4496]], [[TMP4497]] +// SIMD-ONLY0-NEXT: [[CONV5910:%.*]] = zext i1 [[CMP5909]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV5910]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4498:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL5911:%.*]] = icmp ne i32 [[TMP4498]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5911]], label [[IF_THEN5912:%.*]], label [[IF_END5913:%.*]] +// SIMD-ONLY0: if.then5912: +// SIMD-ONLY0-NEXT: [[TMP4499:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4499]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5913]] +// SIMD-ONLY0: if.end5913: +// SIMD-ONLY0-NEXT: [[TMP4500:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4501:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5914:%.*]] = fcmp oeq float [[TMP4500]], [[TMP4501]] +// SIMD-ONLY0-NEXT: [[CONV5915:%.*]] = zext i1 [[CMP5914]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV5915]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4502:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL5916:%.*]] = icmp ne i32 [[TMP4502]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5916]], label [[IF_THEN5917:%.*]], label [[IF_END5918:%.*]] +// SIMD-ONLY0: if.then5917: +// SIMD-ONLY0-NEXT: [[TMP4503:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4503]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5918]] +// SIMD-ONLY0: if.end5918: +// SIMD-ONLY0-NEXT: [[TMP4504:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4505:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5919:%.*]] = fcmp oeq float [[TMP4504]], [[TMP4505]] +// SIMD-ONLY0-NEXT: [[CONV5920:%.*]] = zext i1 [[CMP5919]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV5920]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4506:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL5921:%.*]] = icmp ne i32 [[TMP4506]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5921]], label [[IF_THEN5922:%.*]], label [[IF_ELSE5923:%.*]] +// SIMD-ONLY0: if.then5922: +// SIMD-ONLY0-NEXT: [[TMP4507:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4507]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5924:%.*]] +// SIMD-ONLY0: if.else5923: +// SIMD-ONLY0-NEXT: [[TMP4508:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4508]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5924]] +// SIMD-ONLY0: if.end5924: +// SIMD-ONLY0-NEXT: [[TMP4509:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4510:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5925:%.*]] = fcmp oeq float [[TMP4509]], [[TMP4510]] +// SIMD-ONLY0-NEXT: [[CONV5926:%.*]] = zext i1 [[CMP5925]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV5926]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4511:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL5927:%.*]] = icmp ne i32 [[TMP4511]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5927]], label [[IF_THEN5928:%.*]], label [[IF_ELSE5929:%.*]] +// SIMD-ONLY0: if.then5928: +// SIMD-ONLY0-NEXT: [[TMP4512:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4512]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5930:%.*]] +// SIMD-ONLY0: if.else5929: +// SIMD-ONLY0-NEXT: [[TMP4513:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4513]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5930]] +// SIMD-ONLY0: if.end5930: +// SIMD-ONLY0-NEXT: [[TMP4514:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4514]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4515:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4516:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5931:%.*]] = fcmp ogt float [[TMP4515]], [[TMP4516]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5931]], label [[IF_THEN5933:%.*]], label [[IF_END5934:%.*]] +// SIMD-ONLY0: if.then5933: +// SIMD-ONLY0-NEXT: [[TMP4517:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4517]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5934]] +// SIMD-ONLY0: if.end5934: +// SIMD-ONLY0-NEXT: [[TMP4518:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4518]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4519:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4520:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5935:%.*]] = fcmp ogt float [[TMP4519]], [[TMP4520]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5935]], label [[IF_THEN5937:%.*]], label [[IF_END5938:%.*]] +// SIMD-ONLY0: if.then5937: +// SIMD-ONLY0-NEXT: [[TMP4521:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4521]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5938]] +// SIMD-ONLY0: if.end5938: +// SIMD-ONLY0-NEXT: [[TMP4522:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4522]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4523:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4524:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5939:%.*]] = fcmp olt float [[TMP4523]], [[TMP4524]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5939]], label [[IF_THEN5941:%.*]], label [[IF_END5942:%.*]] +// SIMD-ONLY0: if.then5941: +// SIMD-ONLY0-NEXT: [[TMP4525:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4525]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5942]] +// SIMD-ONLY0: if.end5942: +// SIMD-ONLY0-NEXT: [[TMP4526:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4526]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4527:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4528:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5943:%.*]] = fcmp olt float [[TMP4527]], [[TMP4528]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5943]], label [[IF_THEN5945:%.*]], label [[IF_END5946:%.*]] +// SIMD-ONLY0: if.then5945: +// SIMD-ONLY0-NEXT: [[TMP4529:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4529]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5946]] +// SIMD-ONLY0: if.end5946: +// SIMD-ONLY0-NEXT: [[TMP4530:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4530]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4531:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4532:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5947:%.*]] = fcmp oeq float [[TMP4531]], [[TMP4532]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5947]], label [[IF_THEN5949:%.*]], label [[IF_END5950:%.*]] +// SIMD-ONLY0: if.then5949: +// SIMD-ONLY0-NEXT: [[TMP4533:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4533]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5950]] +// SIMD-ONLY0: if.end5950: +// SIMD-ONLY0-NEXT: [[TMP4534:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4534]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4535:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4536:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5951:%.*]] = fcmp oeq float [[TMP4535]], [[TMP4536]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5951]], label [[IF_THEN5953:%.*]], label [[IF_END5954:%.*]] +// SIMD-ONLY0: if.then5953: +// SIMD-ONLY0-NEXT: [[TMP4537:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4537]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5954]] +// SIMD-ONLY0: if.end5954: +// SIMD-ONLY0-NEXT: [[TMP4538:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4539:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5955:%.*]] = fcmp ogt float [[TMP4538]], [[TMP4539]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5955]], label [[IF_THEN5957:%.*]], label [[IF_END5958:%.*]] +// SIMD-ONLY0: if.then5957: +// SIMD-ONLY0-NEXT: [[TMP4540:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4540]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5958]] +// SIMD-ONLY0: if.end5958: +// SIMD-ONLY0-NEXT: [[TMP4541:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4541]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4542:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4543:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5959:%.*]] = fcmp ogt float [[TMP4542]], [[TMP4543]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5959]], label [[IF_THEN5961:%.*]], label [[IF_END5962:%.*]] +// SIMD-ONLY0: if.then5961: +// SIMD-ONLY0-NEXT: [[TMP4544:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4544]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5962]] +// SIMD-ONLY0: if.end5962: +// SIMD-ONLY0-NEXT: [[TMP4545:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4545]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4546:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4547:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5963:%.*]] = fcmp olt float [[TMP4546]], [[TMP4547]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5963]], label [[IF_THEN5965:%.*]], label [[IF_END5966:%.*]] +// SIMD-ONLY0: if.then5965: +// SIMD-ONLY0-NEXT: [[TMP4548:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4548]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5966]] +// SIMD-ONLY0: if.end5966: +// SIMD-ONLY0-NEXT: [[TMP4549:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4549]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4550:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4551:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5967:%.*]] = fcmp olt float [[TMP4550]], [[TMP4551]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5967]], label [[IF_THEN5969:%.*]], label [[IF_END5970:%.*]] +// SIMD-ONLY0: if.then5969: +// SIMD-ONLY0-NEXT: [[TMP4552:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4552]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5970]] +// SIMD-ONLY0: if.end5970: +// SIMD-ONLY0-NEXT: [[TMP4553:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4553]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4554:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4555:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5971:%.*]] = fcmp oeq float [[TMP4554]], [[TMP4555]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5971]], label [[IF_THEN5973:%.*]], label [[IF_END5974:%.*]] +// SIMD-ONLY0: if.then5973: +// SIMD-ONLY0-NEXT: [[TMP4556:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4556]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5974]] +// SIMD-ONLY0: if.end5974: +// SIMD-ONLY0-NEXT: [[TMP4557:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4557]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4558:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4559:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5975:%.*]] = fcmp oeq float [[TMP4558]], [[TMP4559]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5975]], label [[IF_THEN5977:%.*]], label [[IF_END5978:%.*]] +// SIMD-ONLY0: if.then5977: +// SIMD-ONLY0-NEXT: [[TMP4560:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4560]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5978]] +// SIMD-ONLY0: if.end5978: +// SIMD-ONLY0-NEXT: [[TMP4561:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4561]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4562:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4563:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5979:%.*]] = fcmp oeq float [[TMP4562]], [[TMP4563]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5979]], label [[IF_THEN5981:%.*]], label [[IF_ELSE5982:%.*]] +// SIMD-ONLY0: if.then5981: +// SIMD-ONLY0-NEXT: [[TMP4564:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4564]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5983:%.*]] +// SIMD-ONLY0: if.else5982: +// SIMD-ONLY0-NEXT: [[TMP4565:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4565]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5983]] +// SIMD-ONLY0: if.end5983: +// SIMD-ONLY0-NEXT: [[TMP4566:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4567:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5984:%.*]] = fcmp oeq float [[TMP4566]], [[TMP4567]] +// SIMD-ONLY0-NEXT: br i1 [[CMP5984]], label [[IF_THEN5986:%.*]], label [[IF_ELSE5987:%.*]] +// SIMD-ONLY0: if.then5986: +// SIMD-ONLY0-NEXT: [[TMP4568:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4568]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5988:%.*]] +// SIMD-ONLY0: if.else5987: +// SIMD-ONLY0-NEXT: [[TMP4569:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4569]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5988]] +// SIMD-ONLY0: if.end5988: +// SIMD-ONLY0-NEXT: [[TMP4570:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4571:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5989:%.*]] = fcmp oeq float [[TMP4570]], [[TMP4571]] +// SIMD-ONLY0-NEXT: [[CONV5990:%.*]] = zext i1 [[CMP5989]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV5990]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4572:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL5991:%.*]] = icmp ne i32 [[TMP4572]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5991]], label [[IF_THEN5992:%.*]], label [[IF_END5993:%.*]] +// SIMD-ONLY0: if.then5992: +// SIMD-ONLY0-NEXT: [[TMP4573:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4573]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5993]] +// SIMD-ONLY0: if.end5993: +// SIMD-ONLY0-NEXT: [[TMP4574:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4575:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5994:%.*]] = fcmp oeq float [[TMP4574]], [[TMP4575]] +// SIMD-ONLY0-NEXT: [[CONV5995:%.*]] = zext i1 [[CMP5994]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV5995]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4576:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL5996:%.*]] = icmp ne i32 [[TMP4576]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL5996]], label [[IF_THEN5997:%.*]], label [[IF_END5998:%.*]] +// SIMD-ONLY0: if.then5997: +// SIMD-ONLY0-NEXT: [[TMP4577:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4577]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END5998]] +// SIMD-ONLY0: if.end5998: +// SIMD-ONLY0-NEXT: [[TMP4578:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4579:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP5999:%.*]] = fcmp oeq float [[TMP4578]], [[TMP4579]] +// SIMD-ONLY0-NEXT: [[CONV6000:%.*]] = zext i1 [[CMP5999]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6000]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4580:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6001:%.*]] = icmp ne i32 [[TMP4580]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6001]], label [[IF_THEN6002:%.*]], label [[IF_ELSE6003:%.*]] +// SIMD-ONLY0: if.then6002: +// SIMD-ONLY0-NEXT: [[TMP4581:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4581]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6004:%.*]] +// SIMD-ONLY0: if.else6003: +// SIMD-ONLY0-NEXT: [[TMP4582:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4582]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6004]] +// SIMD-ONLY0: if.end6004: +// SIMD-ONLY0-NEXT: [[TMP4583:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4584:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6005:%.*]] = fcmp oeq float [[TMP4583]], [[TMP4584]] +// SIMD-ONLY0-NEXT: [[CONV6006:%.*]] = zext i1 [[CMP6005]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6006]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4585:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6007:%.*]] = icmp ne i32 [[TMP4585]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6007]], label [[IF_THEN6008:%.*]], label [[IF_ELSE6009:%.*]] +// SIMD-ONLY0: if.then6008: +// SIMD-ONLY0-NEXT: [[TMP4586:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4586]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6010:%.*]] +// SIMD-ONLY0: if.else6009: +// SIMD-ONLY0-NEXT: [[TMP4587:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4587]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6010]] +// SIMD-ONLY0: if.end6010: +// SIMD-ONLY0-NEXT: [[TMP4588:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4588]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4589:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4590:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6011:%.*]] = fcmp ogt float [[TMP4589]], [[TMP4590]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6011]], label [[IF_THEN6013:%.*]], label [[IF_END6014:%.*]] +// SIMD-ONLY0: if.then6013: +// SIMD-ONLY0-NEXT: [[TMP4591:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4591]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6014]] +// SIMD-ONLY0: if.end6014: +// SIMD-ONLY0-NEXT: [[TMP4592:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4592]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4593:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4594:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6015:%.*]] = fcmp ogt float [[TMP4593]], [[TMP4594]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6015]], label [[IF_THEN6017:%.*]], label [[IF_END6018:%.*]] +// SIMD-ONLY0: if.then6017: +// SIMD-ONLY0-NEXT: [[TMP4595:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4595]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6018]] +// SIMD-ONLY0: if.end6018: +// SIMD-ONLY0-NEXT: [[TMP4596:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4596]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4597:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4598:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6019:%.*]] = fcmp olt float [[TMP4597]], [[TMP4598]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6019]], label [[IF_THEN6021:%.*]], label [[IF_END6022:%.*]] +// SIMD-ONLY0: if.then6021: +// SIMD-ONLY0-NEXT: [[TMP4599:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4599]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6022]] +// SIMD-ONLY0: if.end6022: +// SIMD-ONLY0-NEXT: [[TMP4600:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4600]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4601:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4602:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6023:%.*]] = fcmp olt float [[TMP4601]], [[TMP4602]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6023]], label [[IF_THEN6025:%.*]], label [[IF_END6026:%.*]] +// SIMD-ONLY0: if.then6025: +// SIMD-ONLY0-NEXT: [[TMP4603:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4603]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6026]] +// SIMD-ONLY0: if.end6026: +// SIMD-ONLY0-NEXT: [[TMP4604:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4604]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4605:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4606:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6027:%.*]] = fcmp oeq float [[TMP4605]], [[TMP4606]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6027]], label [[IF_THEN6029:%.*]], label [[IF_END6030:%.*]] +// SIMD-ONLY0: if.then6029: +// SIMD-ONLY0-NEXT: [[TMP4607:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4607]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6030]] +// SIMD-ONLY0: if.end6030: +// SIMD-ONLY0-NEXT: [[TMP4608:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4608]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4609:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4610:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6031:%.*]] = fcmp oeq float [[TMP4609]], [[TMP4610]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6031]], label [[IF_THEN6033:%.*]], label [[IF_END6034:%.*]] +// SIMD-ONLY0: if.then6033: +// SIMD-ONLY0-NEXT: [[TMP4611:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4611]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6034]] +// SIMD-ONLY0: if.end6034: +// SIMD-ONLY0-NEXT: [[TMP4612:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4613:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6035:%.*]] = fcmp ogt float [[TMP4612]], [[TMP4613]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6035]], label [[IF_THEN6037:%.*]], label [[IF_END6038:%.*]] +// SIMD-ONLY0: if.then6037: +// SIMD-ONLY0-NEXT: [[TMP4614:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4614]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6038]] +// SIMD-ONLY0: if.end6038: +// SIMD-ONLY0-NEXT: [[TMP4615:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4615]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4616:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4617:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6039:%.*]] = fcmp ogt float [[TMP4616]], [[TMP4617]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6039]], label [[IF_THEN6041:%.*]], label [[IF_END6042:%.*]] +// SIMD-ONLY0: if.then6041: +// SIMD-ONLY0-NEXT: [[TMP4618:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4618]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6042]] +// SIMD-ONLY0: if.end6042: +// SIMD-ONLY0-NEXT: [[TMP4619:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4619]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4620:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4621:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6043:%.*]] = fcmp olt float [[TMP4620]], [[TMP4621]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6043]], label [[IF_THEN6045:%.*]], label [[IF_END6046:%.*]] +// SIMD-ONLY0: if.then6045: +// SIMD-ONLY0-NEXT: [[TMP4622:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4622]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6046]] +// SIMD-ONLY0: if.end6046: +// SIMD-ONLY0-NEXT: [[TMP4623:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4623]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4624:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4625:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6047:%.*]] = fcmp olt float [[TMP4624]], [[TMP4625]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6047]], label [[IF_THEN6049:%.*]], label [[IF_END6050:%.*]] +// SIMD-ONLY0: if.then6049: +// SIMD-ONLY0-NEXT: [[TMP4626:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4626]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6050]] +// SIMD-ONLY0: if.end6050: +// SIMD-ONLY0-NEXT: [[TMP4627:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4627]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4628:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4629:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6051:%.*]] = fcmp oeq float [[TMP4628]], [[TMP4629]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6051]], label [[IF_THEN6053:%.*]], label [[IF_END6054:%.*]] +// SIMD-ONLY0: if.then6053: +// SIMD-ONLY0-NEXT: [[TMP4630:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4630]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6054]] +// SIMD-ONLY0: if.end6054: +// SIMD-ONLY0-NEXT: [[TMP4631:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4631]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4632:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4633:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6055:%.*]] = fcmp oeq float [[TMP4632]], [[TMP4633]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6055]], label [[IF_THEN6057:%.*]], label [[IF_END6058:%.*]] +// SIMD-ONLY0: if.then6057: +// SIMD-ONLY0-NEXT: [[TMP4634:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4634]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6058]] +// SIMD-ONLY0: if.end6058: +// SIMD-ONLY0-NEXT: [[TMP4635:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4635]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4636:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4637:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6059:%.*]] = fcmp oeq float [[TMP4636]], [[TMP4637]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6059]], label [[IF_THEN6061:%.*]], label [[IF_ELSE6062:%.*]] +// SIMD-ONLY0: if.then6061: +// SIMD-ONLY0-NEXT: [[TMP4638:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4638]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6063:%.*]] +// SIMD-ONLY0: if.else6062: +// SIMD-ONLY0-NEXT: [[TMP4639:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4639]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6063]] +// SIMD-ONLY0: if.end6063: +// SIMD-ONLY0-NEXT: [[TMP4640:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4641:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6064:%.*]] = fcmp oeq float [[TMP4640]], [[TMP4641]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6064]], label [[IF_THEN6066:%.*]], label [[IF_ELSE6067:%.*]] +// SIMD-ONLY0: if.then6066: +// SIMD-ONLY0-NEXT: [[TMP4642:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4642]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6068:%.*]] +// SIMD-ONLY0: if.else6067: +// SIMD-ONLY0-NEXT: [[TMP4643:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4643]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6068]] +// SIMD-ONLY0: if.end6068: +// SIMD-ONLY0-NEXT: [[TMP4644:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4645:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6069:%.*]] = fcmp oeq float [[TMP4644]], [[TMP4645]] +// SIMD-ONLY0-NEXT: [[CONV6070:%.*]] = zext i1 [[CMP6069]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6070]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4646:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6071:%.*]] = icmp ne i32 [[TMP4646]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6071]], label [[IF_THEN6072:%.*]], label [[IF_END6073:%.*]] +// SIMD-ONLY0: if.then6072: +// SIMD-ONLY0-NEXT: [[TMP4647:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4647]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6073]] +// SIMD-ONLY0: if.end6073: +// SIMD-ONLY0-NEXT: [[TMP4648:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4649:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6074:%.*]] = fcmp oeq float [[TMP4648]], [[TMP4649]] +// SIMD-ONLY0-NEXT: [[CONV6075:%.*]] = zext i1 [[CMP6074]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6075]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4650:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6076:%.*]] = icmp ne i32 [[TMP4650]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6076]], label [[IF_THEN6077:%.*]], label [[IF_END6078:%.*]] +// SIMD-ONLY0: if.then6077: +// SIMD-ONLY0-NEXT: [[TMP4651:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4651]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6078]] +// SIMD-ONLY0: if.end6078: +// SIMD-ONLY0-NEXT: [[TMP4652:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4653:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6079:%.*]] = fcmp oeq float [[TMP4652]], [[TMP4653]] +// SIMD-ONLY0-NEXT: [[CONV6080:%.*]] = zext i1 [[CMP6079]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6080]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4654:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6081:%.*]] = icmp ne i32 [[TMP4654]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6081]], label [[IF_THEN6082:%.*]], label [[IF_ELSE6083:%.*]] +// SIMD-ONLY0: if.then6082: +// SIMD-ONLY0-NEXT: [[TMP4655:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4655]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6084:%.*]] +// SIMD-ONLY0: if.else6083: +// SIMD-ONLY0-NEXT: [[TMP4656:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4656]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6084]] +// SIMD-ONLY0: if.end6084: +// SIMD-ONLY0-NEXT: [[TMP4657:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4658:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6085:%.*]] = fcmp oeq float [[TMP4657]], [[TMP4658]] +// SIMD-ONLY0-NEXT: [[CONV6086:%.*]] = zext i1 [[CMP6085]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6086]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4659:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6087:%.*]] = icmp ne i32 [[TMP4659]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6087]], label [[IF_THEN6088:%.*]], label [[IF_ELSE6089:%.*]] +// SIMD-ONLY0: if.then6088: +// SIMD-ONLY0-NEXT: [[TMP4660:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4660]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6090:%.*]] +// SIMD-ONLY0: if.else6089: +// SIMD-ONLY0-NEXT: [[TMP4661:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4661]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6090]] +// SIMD-ONLY0: if.end6090: +// SIMD-ONLY0-NEXT: [[TMP4662:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4662]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4663:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4664:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6091:%.*]] = fcmp ogt float [[TMP4663]], [[TMP4664]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6091]], label [[IF_THEN6093:%.*]], label [[IF_END6094:%.*]] +// SIMD-ONLY0: if.then6093: +// SIMD-ONLY0-NEXT: [[TMP4665:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4665]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6094]] +// SIMD-ONLY0: if.end6094: +// SIMD-ONLY0-NEXT: [[TMP4666:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4666]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4667:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4668:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6095:%.*]] = fcmp ogt float [[TMP4667]], [[TMP4668]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6095]], label [[IF_THEN6097:%.*]], label [[IF_END6098:%.*]] +// SIMD-ONLY0: if.then6097: +// SIMD-ONLY0-NEXT: [[TMP4669:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4669]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6098]] +// SIMD-ONLY0: if.end6098: +// SIMD-ONLY0-NEXT: [[TMP4670:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4670]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4671:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4672:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6099:%.*]] = fcmp olt float [[TMP4671]], [[TMP4672]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6099]], label [[IF_THEN6101:%.*]], label [[IF_END6102:%.*]] +// SIMD-ONLY0: if.then6101: +// SIMD-ONLY0-NEXT: [[TMP4673:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4673]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6102]] +// SIMD-ONLY0: if.end6102: +// SIMD-ONLY0-NEXT: [[TMP4674:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4674]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4675:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4676:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6103:%.*]] = fcmp olt float [[TMP4675]], [[TMP4676]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6103]], label [[IF_THEN6105:%.*]], label [[IF_END6106:%.*]] +// SIMD-ONLY0: if.then6105: +// SIMD-ONLY0-NEXT: [[TMP4677:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4677]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6106]] +// SIMD-ONLY0: if.end6106: +// SIMD-ONLY0-NEXT: [[TMP4678:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4678]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4679:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4680:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6107:%.*]] = fcmp oeq float [[TMP4679]], [[TMP4680]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6107]], label [[IF_THEN6109:%.*]], label [[IF_END6110:%.*]] +// SIMD-ONLY0: if.then6109: +// SIMD-ONLY0-NEXT: [[TMP4681:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4681]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6110]] +// SIMD-ONLY0: if.end6110: +// SIMD-ONLY0-NEXT: [[TMP4682:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4682]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4683:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4684:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6111:%.*]] = fcmp oeq float [[TMP4683]], [[TMP4684]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6111]], label [[IF_THEN6113:%.*]], label [[IF_END6114:%.*]] +// SIMD-ONLY0: if.then6113: +// SIMD-ONLY0-NEXT: [[TMP4685:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4685]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6114]] +// SIMD-ONLY0: if.end6114: +// SIMD-ONLY0-NEXT: [[TMP4686:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4687:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6115:%.*]] = fcmp ogt float [[TMP4686]], [[TMP4687]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6115]], label [[IF_THEN6117:%.*]], label [[IF_END6118:%.*]] +// SIMD-ONLY0: if.then6117: +// SIMD-ONLY0-NEXT: [[TMP4688:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4688]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6118]] +// SIMD-ONLY0: if.end6118: +// SIMD-ONLY0-NEXT: [[TMP4689:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4689]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4690:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4691:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6119:%.*]] = fcmp ogt float [[TMP4690]], [[TMP4691]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6119]], label [[IF_THEN6121:%.*]], label [[IF_END6122:%.*]] +// SIMD-ONLY0: if.then6121: +// SIMD-ONLY0-NEXT: [[TMP4692:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4692]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6122]] +// SIMD-ONLY0: if.end6122: +// SIMD-ONLY0-NEXT: [[TMP4693:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4693]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4694:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4695:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6123:%.*]] = fcmp olt float [[TMP4694]], [[TMP4695]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6123]], label [[IF_THEN6125:%.*]], label [[IF_END6126:%.*]] +// SIMD-ONLY0: if.then6125: +// SIMD-ONLY0-NEXT: [[TMP4696:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4696]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6126]] +// SIMD-ONLY0: if.end6126: +// SIMD-ONLY0-NEXT: [[TMP4697:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4697]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4698:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4699:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6127:%.*]] = fcmp olt float [[TMP4698]], [[TMP4699]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6127]], label [[IF_THEN6129:%.*]], label [[IF_END6130:%.*]] +// SIMD-ONLY0: if.then6129: +// SIMD-ONLY0-NEXT: [[TMP4700:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4700]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6130]] +// SIMD-ONLY0: if.end6130: +// SIMD-ONLY0-NEXT: [[TMP4701:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4701]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4702:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4703:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6131:%.*]] = fcmp oeq float [[TMP4702]], [[TMP4703]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6131]], label [[IF_THEN6133:%.*]], label [[IF_END6134:%.*]] +// SIMD-ONLY0: if.then6133: +// SIMD-ONLY0-NEXT: [[TMP4704:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4704]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6134]] +// SIMD-ONLY0: if.end6134: +// SIMD-ONLY0-NEXT: [[TMP4705:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4705]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4706:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4707:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6135:%.*]] = fcmp oeq float [[TMP4706]], [[TMP4707]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6135]], label [[IF_THEN6137:%.*]], label [[IF_END6138:%.*]] +// SIMD-ONLY0: if.then6137: +// SIMD-ONLY0-NEXT: [[TMP4708:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4708]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6138]] +// SIMD-ONLY0: if.end6138: +// SIMD-ONLY0-NEXT: [[TMP4709:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4709]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4710:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4711:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6139:%.*]] = fcmp oeq float [[TMP4710]], [[TMP4711]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6139]], label [[IF_THEN6141:%.*]], label [[IF_ELSE6142:%.*]] +// SIMD-ONLY0: if.then6141: +// SIMD-ONLY0-NEXT: [[TMP4712:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4712]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6143:%.*]] +// SIMD-ONLY0: if.else6142: +// SIMD-ONLY0-NEXT: [[TMP4713:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4713]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6143]] +// SIMD-ONLY0: if.end6143: +// SIMD-ONLY0-NEXT: [[TMP4714:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4715:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6144:%.*]] = fcmp oeq float [[TMP4714]], [[TMP4715]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6144]], label [[IF_THEN6146:%.*]], label [[IF_ELSE6147:%.*]] +// SIMD-ONLY0: if.then6146: +// SIMD-ONLY0-NEXT: [[TMP4716:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4716]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6148:%.*]] +// SIMD-ONLY0: if.else6147: +// SIMD-ONLY0-NEXT: [[TMP4717:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4717]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6148]] +// SIMD-ONLY0: if.end6148: +// SIMD-ONLY0-NEXT: [[TMP4718:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4719:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6149:%.*]] = fcmp oeq float [[TMP4718]], [[TMP4719]] +// SIMD-ONLY0-NEXT: [[CONV6150:%.*]] = zext i1 [[CMP6149]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6150]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4720:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6151:%.*]] = icmp ne i32 [[TMP4720]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6151]], label [[IF_THEN6152:%.*]], label [[IF_END6153:%.*]] +// SIMD-ONLY0: if.then6152: +// SIMD-ONLY0-NEXT: [[TMP4721:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4721]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6153]] +// SIMD-ONLY0: if.end6153: +// SIMD-ONLY0-NEXT: [[TMP4722:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4723:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6154:%.*]] = fcmp oeq float [[TMP4722]], [[TMP4723]] +// SIMD-ONLY0-NEXT: [[CONV6155:%.*]] = zext i1 [[CMP6154]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6155]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4724:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6156:%.*]] = icmp ne i32 [[TMP4724]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6156]], label [[IF_THEN6157:%.*]], label [[IF_END6158:%.*]] +// SIMD-ONLY0: if.then6157: +// SIMD-ONLY0-NEXT: [[TMP4725:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4725]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6158]] +// SIMD-ONLY0: if.end6158: +// SIMD-ONLY0-NEXT: [[TMP4726:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4727:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6159:%.*]] = fcmp oeq float [[TMP4726]], [[TMP4727]] +// SIMD-ONLY0-NEXT: [[CONV6160:%.*]] = zext i1 [[CMP6159]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6160]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4728:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6161:%.*]] = icmp ne i32 [[TMP4728]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6161]], label [[IF_THEN6162:%.*]], label [[IF_ELSE6163:%.*]] +// SIMD-ONLY0: if.then6162: +// SIMD-ONLY0-NEXT: [[TMP4729:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4729]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6164:%.*]] +// SIMD-ONLY0: if.else6163: +// SIMD-ONLY0-NEXT: [[TMP4730:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4730]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6164]] +// SIMD-ONLY0: if.end6164: +// SIMD-ONLY0-NEXT: [[TMP4731:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4732:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6165:%.*]] = fcmp oeq float [[TMP4731]], [[TMP4732]] +// SIMD-ONLY0-NEXT: [[CONV6166:%.*]] = zext i1 [[CMP6165]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6166]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4733:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6167:%.*]] = icmp ne i32 [[TMP4733]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6167]], label [[IF_THEN6168:%.*]], label [[IF_ELSE6169:%.*]] +// SIMD-ONLY0: if.then6168: +// SIMD-ONLY0-NEXT: [[TMP4734:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4734]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6170:%.*]] +// SIMD-ONLY0: if.else6169: +// SIMD-ONLY0-NEXT: [[TMP4735:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4735]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6170]] +// SIMD-ONLY0: if.end6170: +// SIMD-ONLY0-NEXT: [[TMP4736:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4736]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4737:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4738:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6171:%.*]] = fcmp ogt float [[TMP4737]], [[TMP4738]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6171]], label [[IF_THEN6173:%.*]], label [[IF_END6174:%.*]] +// SIMD-ONLY0: if.then6173: +// SIMD-ONLY0-NEXT: [[TMP4739:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4739]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6174]] +// SIMD-ONLY0: if.end6174: +// SIMD-ONLY0-NEXT: [[TMP4740:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4740]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4741:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4742:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6175:%.*]] = fcmp ogt float [[TMP4741]], [[TMP4742]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6175]], label [[IF_THEN6177:%.*]], label [[IF_END6178:%.*]] +// SIMD-ONLY0: if.then6177: +// SIMD-ONLY0-NEXT: [[TMP4743:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4743]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6178]] +// SIMD-ONLY0: if.end6178: +// SIMD-ONLY0-NEXT: [[TMP4744:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4744]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4745:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4746:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6179:%.*]] = fcmp olt float [[TMP4745]], [[TMP4746]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6179]], label [[IF_THEN6181:%.*]], label [[IF_END6182:%.*]] +// SIMD-ONLY0: if.then6181: +// SIMD-ONLY0-NEXT: [[TMP4747:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4747]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6182]] +// SIMD-ONLY0: if.end6182: +// SIMD-ONLY0-NEXT: [[TMP4748:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4748]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4749:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4750:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6183:%.*]] = fcmp olt float [[TMP4749]], [[TMP4750]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6183]], label [[IF_THEN6185:%.*]], label [[IF_END6186:%.*]] +// SIMD-ONLY0: if.then6185: +// SIMD-ONLY0-NEXT: [[TMP4751:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4751]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6186]] +// SIMD-ONLY0: if.end6186: +// SIMD-ONLY0-NEXT: [[TMP4752:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4752]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4753:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4754:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6187:%.*]] = fcmp oeq float [[TMP4753]], [[TMP4754]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6187]], label [[IF_THEN6189:%.*]], label [[IF_END6190:%.*]] +// SIMD-ONLY0: if.then6189: +// SIMD-ONLY0-NEXT: [[TMP4755:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4755]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6190]] +// SIMD-ONLY0: if.end6190: +// SIMD-ONLY0-NEXT: [[TMP4756:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4756]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4757:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4758:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6191:%.*]] = fcmp oeq float [[TMP4757]], [[TMP4758]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6191]], label [[IF_THEN6193:%.*]], label [[IF_END6194:%.*]] +// SIMD-ONLY0: if.then6193: +// SIMD-ONLY0-NEXT: [[TMP4759:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4759]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6194]] +// SIMD-ONLY0: if.end6194: +// SIMD-ONLY0-NEXT: [[TMP4760:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4761:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6195:%.*]] = fcmp ogt float [[TMP4760]], [[TMP4761]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6195]], label [[IF_THEN6197:%.*]], label [[IF_END6198:%.*]] +// SIMD-ONLY0: if.then6197: +// SIMD-ONLY0-NEXT: [[TMP4762:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4762]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6198]] +// SIMD-ONLY0: if.end6198: +// SIMD-ONLY0-NEXT: [[TMP4763:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4763]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4764:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4765:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6199:%.*]] = fcmp ogt float [[TMP4764]], [[TMP4765]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6199]], label [[IF_THEN6201:%.*]], label [[IF_END6202:%.*]] +// SIMD-ONLY0: if.then6201: +// SIMD-ONLY0-NEXT: [[TMP4766:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4766]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6202]] +// SIMD-ONLY0: if.end6202: +// SIMD-ONLY0-NEXT: [[TMP4767:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4767]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4768:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4769:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6203:%.*]] = fcmp olt float [[TMP4768]], [[TMP4769]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6203]], label [[IF_THEN6205:%.*]], label [[IF_END6206:%.*]] +// SIMD-ONLY0: if.then6205: +// SIMD-ONLY0-NEXT: [[TMP4770:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4770]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6206]] +// SIMD-ONLY0: if.end6206: +// SIMD-ONLY0-NEXT: [[TMP4771:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4771]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4772:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4773:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6207:%.*]] = fcmp olt float [[TMP4772]], [[TMP4773]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6207]], label [[IF_THEN6209:%.*]], label [[IF_END6210:%.*]] +// SIMD-ONLY0: if.then6209: +// SIMD-ONLY0-NEXT: [[TMP4774:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4774]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6210]] +// SIMD-ONLY0: if.end6210: +// SIMD-ONLY0-NEXT: [[TMP4775:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4775]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4776:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4777:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6211:%.*]] = fcmp oeq float [[TMP4776]], [[TMP4777]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6211]], label [[IF_THEN6213:%.*]], label [[IF_END6214:%.*]] +// SIMD-ONLY0: if.then6213: +// SIMD-ONLY0-NEXT: [[TMP4778:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4778]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6214]] +// SIMD-ONLY0: if.end6214: +// SIMD-ONLY0-NEXT: [[TMP4779:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4779]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4780:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4781:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6215:%.*]] = fcmp oeq float [[TMP4780]], [[TMP4781]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6215]], label [[IF_THEN6217:%.*]], label [[IF_END6218:%.*]] +// SIMD-ONLY0: if.then6217: +// SIMD-ONLY0-NEXT: [[TMP4782:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4782]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6218]] +// SIMD-ONLY0: if.end6218: +// SIMD-ONLY0-NEXT: [[TMP4783:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4783]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4784:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4785:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6219:%.*]] = fcmp oeq float [[TMP4784]], [[TMP4785]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6219]], label [[IF_THEN6221:%.*]], label [[IF_ELSE6222:%.*]] +// SIMD-ONLY0: if.then6221: +// SIMD-ONLY0-NEXT: [[TMP4786:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4786]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6223:%.*]] +// SIMD-ONLY0: if.else6222: +// SIMD-ONLY0-NEXT: [[TMP4787:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4787]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6223]] +// SIMD-ONLY0: if.end6223: +// SIMD-ONLY0-NEXT: [[TMP4788:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4789:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6224:%.*]] = fcmp oeq float [[TMP4788]], [[TMP4789]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6224]], label [[IF_THEN6226:%.*]], label [[IF_ELSE6227:%.*]] +// SIMD-ONLY0: if.then6226: +// SIMD-ONLY0-NEXT: [[TMP4790:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4790]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6228:%.*]] +// SIMD-ONLY0: if.else6227: +// SIMD-ONLY0-NEXT: [[TMP4791:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4791]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6228]] +// SIMD-ONLY0: if.end6228: +// SIMD-ONLY0-NEXT: [[TMP4792:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4793:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6229:%.*]] = fcmp oeq float [[TMP4792]], [[TMP4793]] +// SIMD-ONLY0-NEXT: [[CONV6230:%.*]] = zext i1 [[CMP6229]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6230]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4794:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6231:%.*]] = icmp ne i32 [[TMP4794]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6231]], label [[IF_THEN6232:%.*]], label [[IF_END6233:%.*]] +// SIMD-ONLY0: if.then6232: +// SIMD-ONLY0-NEXT: [[TMP4795:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4795]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6233]] +// SIMD-ONLY0: if.end6233: +// SIMD-ONLY0-NEXT: [[TMP4796:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4797:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6234:%.*]] = fcmp oeq float [[TMP4796]], [[TMP4797]] +// SIMD-ONLY0-NEXT: [[CONV6235:%.*]] = zext i1 [[CMP6234]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6235]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4798:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6236:%.*]] = icmp ne i32 [[TMP4798]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6236]], label [[IF_THEN6237:%.*]], label [[IF_END6238:%.*]] +// SIMD-ONLY0: if.then6237: +// SIMD-ONLY0-NEXT: [[TMP4799:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4799]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6238]] +// SIMD-ONLY0: if.end6238: +// SIMD-ONLY0-NEXT: [[TMP4800:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4801:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6239:%.*]] = fcmp oeq float [[TMP4800]], [[TMP4801]] +// SIMD-ONLY0-NEXT: [[CONV6240:%.*]] = zext i1 [[CMP6239]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6240]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4802:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6241:%.*]] = icmp ne i32 [[TMP4802]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6241]], label [[IF_THEN6242:%.*]], label [[IF_ELSE6243:%.*]] +// SIMD-ONLY0: if.then6242: +// SIMD-ONLY0-NEXT: [[TMP4803:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4803]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6244:%.*]] +// SIMD-ONLY0: if.else6243: +// SIMD-ONLY0-NEXT: [[TMP4804:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4804]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6244]] +// SIMD-ONLY0: if.end6244: +// SIMD-ONLY0-NEXT: [[TMP4805:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4806:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6245:%.*]] = fcmp oeq float [[TMP4805]], [[TMP4806]] +// SIMD-ONLY0-NEXT: [[CONV6246:%.*]] = zext i1 [[CMP6245]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6246]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4807:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6247:%.*]] = icmp ne i32 [[TMP4807]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6247]], label [[IF_THEN6248:%.*]], label [[IF_ELSE6249:%.*]] +// SIMD-ONLY0: if.then6248: +// SIMD-ONLY0-NEXT: [[TMP4808:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4808]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6250:%.*]] +// SIMD-ONLY0: if.else6249: +// SIMD-ONLY0-NEXT: [[TMP4809:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4809]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6250]] +// SIMD-ONLY0: if.end6250: +// SIMD-ONLY0-NEXT: [[TMP4810:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4810]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4811:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4812:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6251:%.*]] = fcmp ogt float [[TMP4811]], [[TMP4812]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6251]], label [[IF_THEN6253:%.*]], label [[IF_END6254:%.*]] +// SIMD-ONLY0: if.then6253: +// SIMD-ONLY0-NEXT: [[TMP4813:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4813]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6254]] +// SIMD-ONLY0: if.end6254: +// SIMD-ONLY0-NEXT: [[TMP4814:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4814]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4815:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4816:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6255:%.*]] = fcmp ogt float [[TMP4815]], [[TMP4816]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6255]], label [[IF_THEN6257:%.*]], label [[IF_END6258:%.*]] +// SIMD-ONLY0: if.then6257: +// SIMD-ONLY0-NEXT: [[TMP4817:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4817]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6258]] +// SIMD-ONLY0: if.end6258: +// SIMD-ONLY0-NEXT: [[TMP4818:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4818]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4819:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4820:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6259:%.*]] = fcmp olt float [[TMP4819]], [[TMP4820]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6259]], label [[IF_THEN6261:%.*]], label [[IF_END6262:%.*]] +// SIMD-ONLY0: if.then6261: +// SIMD-ONLY0-NEXT: [[TMP4821:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4821]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6262]] +// SIMD-ONLY0: if.end6262: +// SIMD-ONLY0-NEXT: [[TMP4822:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4822]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4823:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4824:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6263:%.*]] = fcmp olt float [[TMP4823]], [[TMP4824]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6263]], label [[IF_THEN6265:%.*]], label [[IF_END6266:%.*]] +// SIMD-ONLY0: if.then6265: +// SIMD-ONLY0-NEXT: [[TMP4825:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4825]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6266]] +// SIMD-ONLY0: if.end6266: +// SIMD-ONLY0-NEXT: [[TMP4826:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4826]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4827:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4828:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6267:%.*]] = fcmp oeq float [[TMP4827]], [[TMP4828]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6267]], label [[IF_THEN6269:%.*]], label [[IF_END6270:%.*]] +// SIMD-ONLY0: if.then6269: +// SIMD-ONLY0-NEXT: [[TMP4829:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4829]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6270]] +// SIMD-ONLY0: if.end6270: +// SIMD-ONLY0-NEXT: [[TMP4830:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4830]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4831:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4832:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6271:%.*]] = fcmp oeq float [[TMP4831]], [[TMP4832]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6271]], label [[IF_THEN6273:%.*]], label [[IF_END6274:%.*]] +// SIMD-ONLY0: if.then6273: +// SIMD-ONLY0-NEXT: [[TMP4833:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4833]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6274]] +// SIMD-ONLY0: if.end6274: +// SIMD-ONLY0-NEXT: [[TMP4834:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4835:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6275:%.*]] = fcmp ogt float [[TMP4834]], [[TMP4835]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6275]], label [[IF_THEN6277:%.*]], label [[IF_END6278:%.*]] +// SIMD-ONLY0: if.then6277: +// SIMD-ONLY0-NEXT: [[TMP4836:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4836]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6278]] +// SIMD-ONLY0: if.end6278: +// SIMD-ONLY0-NEXT: [[TMP4837:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4837]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4838:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4839:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6279:%.*]] = fcmp ogt float [[TMP4838]], [[TMP4839]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6279]], label [[IF_THEN6281:%.*]], label [[IF_END6282:%.*]] +// SIMD-ONLY0: if.then6281: +// SIMD-ONLY0-NEXT: [[TMP4840:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4840]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6282]] +// SIMD-ONLY0: if.end6282: +// SIMD-ONLY0-NEXT: [[TMP4841:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4841]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4842:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4843:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6283:%.*]] = fcmp olt float [[TMP4842]], [[TMP4843]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6283]], label [[IF_THEN6285:%.*]], label [[IF_END6286:%.*]] +// SIMD-ONLY0: if.then6285: +// SIMD-ONLY0-NEXT: [[TMP4844:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4844]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6286]] +// SIMD-ONLY0: if.end6286: +// SIMD-ONLY0-NEXT: [[TMP4845:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4845]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4846:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4847:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6287:%.*]] = fcmp olt float [[TMP4846]], [[TMP4847]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6287]], label [[IF_THEN6289:%.*]], label [[IF_END6290:%.*]] +// SIMD-ONLY0: if.then6289: +// SIMD-ONLY0-NEXT: [[TMP4848:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4848]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6290]] +// SIMD-ONLY0: if.end6290: +// SIMD-ONLY0-NEXT: [[TMP4849:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4849]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4850:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4851:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6291:%.*]] = fcmp oeq float [[TMP4850]], [[TMP4851]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6291]], label [[IF_THEN6293:%.*]], label [[IF_END6294:%.*]] +// SIMD-ONLY0: if.then6293: +// SIMD-ONLY0-NEXT: [[TMP4852:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4852]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6294]] +// SIMD-ONLY0: if.end6294: +// SIMD-ONLY0-NEXT: [[TMP4853:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4853]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4854:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4855:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6295:%.*]] = fcmp oeq float [[TMP4854]], [[TMP4855]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6295]], label [[IF_THEN6297:%.*]], label [[IF_END6298:%.*]] +// SIMD-ONLY0: if.then6297: +// SIMD-ONLY0-NEXT: [[TMP4856:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4856]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6298]] +// SIMD-ONLY0: if.end6298: +// SIMD-ONLY0-NEXT: [[TMP4857:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4857]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4858:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4859:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6299:%.*]] = fcmp oeq float [[TMP4858]], [[TMP4859]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6299]], label [[IF_THEN6301:%.*]], label [[IF_ELSE6302:%.*]] +// SIMD-ONLY0: if.then6301: +// SIMD-ONLY0-NEXT: [[TMP4860:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4860]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6303:%.*]] +// SIMD-ONLY0: if.else6302: +// SIMD-ONLY0-NEXT: [[TMP4861:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4861]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6303]] +// SIMD-ONLY0: if.end6303: +// SIMD-ONLY0-NEXT: [[TMP4862:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4863:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6304:%.*]] = fcmp oeq float [[TMP4862]], [[TMP4863]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6304]], label [[IF_THEN6306:%.*]], label [[IF_ELSE6307:%.*]] +// SIMD-ONLY0: if.then6306: +// SIMD-ONLY0-NEXT: [[TMP4864:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4864]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6308:%.*]] +// SIMD-ONLY0: if.else6307: +// SIMD-ONLY0-NEXT: [[TMP4865:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4865]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6308]] +// SIMD-ONLY0: if.end6308: +// SIMD-ONLY0-NEXT: [[TMP4866:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4867:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6309:%.*]] = fcmp oeq float [[TMP4866]], [[TMP4867]] +// SIMD-ONLY0-NEXT: [[CONV6310:%.*]] = zext i1 [[CMP6309]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6310]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4868:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6311:%.*]] = icmp ne i32 [[TMP4868]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6311]], label [[IF_THEN6312:%.*]], label [[IF_END6313:%.*]] +// SIMD-ONLY0: if.then6312: +// SIMD-ONLY0-NEXT: [[TMP4869:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4869]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6313]] +// SIMD-ONLY0: if.end6313: +// SIMD-ONLY0-NEXT: [[TMP4870:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4871:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6314:%.*]] = fcmp oeq float [[TMP4870]], [[TMP4871]] +// SIMD-ONLY0-NEXT: [[CONV6315:%.*]] = zext i1 [[CMP6314]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6315]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4872:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6316:%.*]] = icmp ne i32 [[TMP4872]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6316]], label [[IF_THEN6317:%.*]], label [[IF_END6318:%.*]] +// SIMD-ONLY0: if.then6317: +// SIMD-ONLY0-NEXT: [[TMP4873:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4873]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6318]] +// SIMD-ONLY0: if.end6318: +// SIMD-ONLY0-NEXT: [[TMP4874:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4875:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6319:%.*]] = fcmp oeq float [[TMP4874]], [[TMP4875]] +// SIMD-ONLY0-NEXT: [[CONV6320:%.*]] = zext i1 [[CMP6319]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6320]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4876:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6321:%.*]] = icmp ne i32 [[TMP4876]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6321]], label [[IF_THEN6322:%.*]], label [[IF_ELSE6323:%.*]] +// SIMD-ONLY0: if.then6322: +// SIMD-ONLY0-NEXT: [[TMP4877:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4877]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6324:%.*]] +// SIMD-ONLY0: if.else6323: +// SIMD-ONLY0-NEXT: [[TMP4878:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4878]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6324]] +// SIMD-ONLY0: if.end6324: +// SIMD-ONLY0-NEXT: [[TMP4879:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4880:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6325:%.*]] = fcmp oeq float [[TMP4879]], [[TMP4880]] +// SIMD-ONLY0-NEXT: [[CONV6326:%.*]] = zext i1 [[CMP6325]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6326]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4881:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6327:%.*]] = icmp ne i32 [[TMP4881]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6327]], label [[IF_THEN6328:%.*]], label [[IF_ELSE6329:%.*]] +// SIMD-ONLY0: if.then6328: +// SIMD-ONLY0-NEXT: [[TMP4882:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4882]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6330:%.*]] +// SIMD-ONLY0: if.else6329: +// SIMD-ONLY0-NEXT: [[TMP4883:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP4883]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: br label [[IF_END6330]] +// SIMD-ONLY0: if.end6330: +// SIMD-ONLY0-NEXT: [[TMP4884:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4884]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4885:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4886:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6331:%.*]] = fcmp ogt double [[TMP4885]], [[TMP4886]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6331]], label [[IF_THEN6333:%.*]], label [[IF_END6334:%.*]] +// SIMD-ONLY0: if.then6333: +// SIMD-ONLY0-NEXT: [[TMP4887:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4887]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6334]] +// SIMD-ONLY0: if.end6334: +// SIMD-ONLY0-NEXT: [[TMP4888:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4888]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4889:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4890:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6335:%.*]] = fcmp ogt double [[TMP4889]], [[TMP4890]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6335]], label [[IF_THEN6337:%.*]], label [[IF_END6338:%.*]] +// SIMD-ONLY0: if.then6337: +// SIMD-ONLY0-NEXT: [[TMP4891:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4891]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6338]] +// SIMD-ONLY0: if.end6338: +// SIMD-ONLY0-NEXT: [[TMP4892:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4892]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4893:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4894:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6339:%.*]] = fcmp olt double [[TMP4893]], [[TMP4894]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6339]], label [[IF_THEN6341:%.*]], label [[IF_END6342:%.*]] +// SIMD-ONLY0: if.then6341: +// SIMD-ONLY0-NEXT: [[TMP4895:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4895]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6342]] +// SIMD-ONLY0: if.end6342: +// SIMD-ONLY0-NEXT: [[TMP4896:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4896]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4897:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4898:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6343:%.*]] = fcmp olt double [[TMP4897]], [[TMP4898]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6343]], label [[IF_THEN6345:%.*]], label [[IF_END6346:%.*]] +// SIMD-ONLY0: if.then6345: +// SIMD-ONLY0-NEXT: [[TMP4899:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4899]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6346]] +// SIMD-ONLY0: if.end6346: +// SIMD-ONLY0-NEXT: [[TMP4900:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4900]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4901:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4902:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6347:%.*]] = fcmp oeq double [[TMP4901]], [[TMP4902]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6347]], label [[IF_THEN6349:%.*]], label [[IF_END6350:%.*]] +// SIMD-ONLY0: if.then6349: +// SIMD-ONLY0-NEXT: [[TMP4903:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4903]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6350]] +// SIMD-ONLY0: if.end6350: +// SIMD-ONLY0-NEXT: [[TMP4904:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4904]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4905:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4906:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6351:%.*]] = fcmp oeq double [[TMP4905]], [[TMP4906]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6351]], label [[IF_THEN6353:%.*]], label [[IF_END6354:%.*]] +// SIMD-ONLY0: if.then6353: +// SIMD-ONLY0-NEXT: [[TMP4907:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4907]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6354]] +// SIMD-ONLY0: if.end6354: +// SIMD-ONLY0-NEXT: [[TMP4908:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4909:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6355:%.*]] = fcmp ogt double [[TMP4908]], [[TMP4909]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6355]], label [[IF_THEN6357:%.*]], label [[IF_END6358:%.*]] +// SIMD-ONLY0: if.then6357: +// SIMD-ONLY0-NEXT: [[TMP4910:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4910]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6358]] +// SIMD-ONLY0: if.end6358: +// SIMD-ONLY0-NEXT: [[TMP4911:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4911]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4912:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4913:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6359:%.*]] = fcmp ogt double [[TMP4912]], [[TMP4913]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6359]], label [[IF_THEN6361:%.*]], label [[IF_END6362:%.*]] +// SIMD-ONLY0: if.then6361: +// SIMD-ONLY0-NEXT: [[TMP4914:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4914]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6362]] +// SIMD-ONLY0: if.end6362: +// SIMD-ONLY0-NEXT: [[TMP4915:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4915]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4916:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4917:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6363:%.*]] = fcmp olt double [[TMP4916]], [[TMP4917]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6363]], label [[IF_THEN6365:%.*]], label [[IF_END6366:%.*]] +// SIMD-ONLY0: if.then6365: +// SIMD-ONLY0-NEXT: [[TMP4918:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4918]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6366]] +// SIMD-ONLY0: if.end6366: +// SIMD-ONLY0-NEXT: [[TMP4919:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4919]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4920:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4921:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6367:%.*]] = fcmp olt double [[TMP4920]], [[TMP4921]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6367]], label [[IF_THEN6369:%.*]], label [[IF_END6370:%.*]] +// SIMD-ONLY0: if.then6369: +// SIMD-ONLY0-NEXT: [[TMP4922:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4922]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6370]] +// SIMD-ONLY0: if.end6370: +// SIMD-ONLY0-NEXT: [[TMP4923:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4923]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4924:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4925:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6371:%.*]] = fcmp oeq double [[TMP4924]], [[TMP4925]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6371]], label [[IF_THEN6373:%.*]], label [[IF_END6374:%.*]] +// SIMD-ONLY0: if.then6373: +// SIMD-ONLY0-NEXT: [[TMP4926:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4926]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6374]] +// SIMD-ONLY0: if.end6374: +// SIMD-ONLY0-NEXT: [[TMP4927:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4927]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4928:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4929:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6375:%.*]] = fcmp oeq double [[TMP4928]], [[TMP4929]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6375]], label [[IF_THEN6377:%.*]], label [[IF_END6378:%.*]] +// SIMD-ONLY0: if.then6377: +// SIMD-ONLY0-NEXT: [[TMP4930:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4930]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6378]] +// SIMD-ONLY0: if.end6378: +// SIMD-ONLY0-NEXT: [[TMP4931:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4931]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4932:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4933:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6379:%.*]] = fcmp oeq double [[TMP4932]], [[TMP4933]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6379]], label [[IF_THEN6381:%.*]], label [[IF_ELSE6382:%.*]] +// SIMD-ONLY0: if.then6381: +// SIMD-ONLY0-NEXT: [[TMP4934:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4934]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6383:%.*]] +// SIMD-ONLY0: if.else6382: +// SIMD-ONLY0-NEXT: [[TMP4935:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4935]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6383]] +// SIMD-ONLY0: if.end6383: +// SIMD-ONLY0-NEXT: [[TMP4936:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4937:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6384:%.*]] = fcmp oeq double [[TMP4936]], [[TMP4937]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6384]], label [[IF_THEN6386:%.*]], label [[IF_ELSE6387:%.*]] +// SIMD-ONLY0: if.then6386: +// SIMD-ONLY0-NEXT: [[TMP4938:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4938]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6388:%.*]] +// SIMD-ONLY0: if.else6387: +// SIMD-ONLY0-NEXT: [[TMP4939:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4939]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6388]] +// SIMD-ONLY0: if.end6388: +// SIMD-ONLY0-NEXT: [[TMP4940:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4941:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6389:%.*]] = fcmp oeq double [[TMP4940]], [[TMP4941]] +// SIMD-ONLY0-NEXT: [[CONV6390:%.*]] = zext i1 [[CMP6389]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6390]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4942:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6391:%.*]] = icmp ne i32 [[TMP4942]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6391]], label [[IF_THEN6392:%.*]], label [[IF_END6393:%.*]] +// SIMD-ONLY0: if.then6392: +// SIMD-ONLY0-NEXT: [[TMP4943:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4943]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6393]] +// SIMD-ONLY0: if.end6393: +// SIMD-ONLY0-NEXT: [[TMP4944:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4945:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6394:%.*]] = fcmp oeq double [[TMP4944]], [[TMP4945]] +// SIMD-ONLY0-NEXT: [[CONV6395:%.*]] = zext i1 [[CMP6394]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6395]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4946:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6396:%.*]] = icmp ne i32 [[TMP4946]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6396]], label [[IF_THEN6397:%.*]], label [[IF_END6398:%.*]] +// SIMD-ONLY0: if.then6397: +// SIMD-ONLY0-NEXT: [[TMP4947:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4947]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6398]] +// SIMD-ONLY0: if.end6398: +// SIMD-ONLY0-NEXT: [[TMP4948:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4949:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6399:%.*]] = fcmp oeq double [[TMP4948]], [[TMP4949]] +// SIMD-ONLY0-NEXT: [[CONV6400:%.*]] = zext i1 [[CMP6399]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6400]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4950:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6401:%.*]] = icmp ne i32 [[TMP4950]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6401]], label [[IF_THEN6402:%.*]], label [[IF_ELSE6403:%.*]] +// SIMD-ONLY0: if.then6402: +// SIMD-ONLY0-NEXT: [[TMP4951:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4951]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6404:%.*]] +// SIMD-ONLY0: if.else6403: +// SIMD-ONLY0-NEXT: [[TMP4952:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4952]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6404]] +// SIMD-ONLY0: if.end6404: +// SIMD-ONLY0-NEXT: [[TMP4953:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4954:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6405:%.*]] = fcmp oeq double [[TMP4953]], [[TMP4954]] +// SIMD-ONLY0-NEXT: [[CONV6406:%.*]] = zext i1 [[CMP6405]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6406]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4955:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6407:%.*]] = icmp ne i32 [[TMP4955]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6407]], label [[IF_THEN6408:%.*]], label [[IF_ELSE6409:%.*]] +// SIMD-ONLY0: if.then6408: +// SIMD-ONLY0-NEXT: [[TMP4956:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4956]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6410:%.*]] +// SIMD-ONLY0: if.else6409: +// SIMD-ONLY0-NEXT: [[TMP4957:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4957]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6410]] +// SIMD-ONLY0: if.end6410: +// SIMD-ONLY0-NEXT: [[TMP4958:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4958]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4959:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4960:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6411:%.*]] = fcmp ogt double [[TMP4959]], [[TMP4960]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6411]], label [[IF_THEN6413:%.*]], label [[IF_END6414:%.*]] +// SIMD-ONLY0: if.then6413: +// SIMD-ONLY0-NEXT: [[TMP4961:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4961]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6414]] +// SIMD-ONLY0: if.end6414: +// SIMD-ONLY0-NEXT: [[TMP4962:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4962]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4963:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4964:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6415:%.*]] = fcmp ogt double [[TMP4963]], [[TMP4964]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6415]], label [[IF_THEN6417:%.*]], label [[IF_END6418:%.*]] +// SIMD-ONLY0: if.then6417: +// SIMD-ONLY0-NEXT: [[TMP4965:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4965]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6418]] +// SIMD-ONLY0: if.end6418: +// SIMD-ONLY0-NEXT: [[TMP4966:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4966]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4967:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4968:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6419:%.*]] = fcmp olt double [[TMP4967]], [[TMP4968]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6419]], label [[IF_THEN6421:%.*]], label [[IF_END6422:%.*]] +// SIMD-ONLY0: if.then6421: +// SIMD-ONLY0-NEXT: [[TMP4969:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4969]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6422]] +// SIMD-ONLY0: if.end6422: +// SIMD-ONLY0-NEXT: [[TMP4970:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4970]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4971:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4972:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6423:%.*]] = fcmp olt double [[TMP4971]], [[TMP4972]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6423]], label [[IF_THEN6425:%.*]], label [[IF_END6426:%.*]] +// SIMD-ONLY0: if.then6425: +// SIMD-ONLY0-NEXT: [[TMP4973:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4973]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6426]] +// SIMD-ONLY0: if.end6426: +// SIMD-ONLY0-NEXT: [[TMP4974:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4974]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4975:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4976:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6427:%.*]] = fcmp oeq double [[TMP4975]], [[TMP4976]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6427]], label [[IF_THEN6429:%.*]], label [[IF_END6430:%.*]] +// SIMD-ONLY0: if.then6429: +// SIMD-ONLY0-NEXT: [[TMP4977:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4977]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6430]] +// SIMD-ONLY0: if.end6430: +// SIMD-ONLY0-NEXT: [[TMP4978:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4978]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4979:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4980:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6431:%.*]] = fcmp oeq double [[TMP4979]], [[TMP4980]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6431]], label [[IF_THEN6433:%.*]], label [[IF_END6434:%.*]] +// SIMD-ONLY0: if.then6433: +// SIMD-ONLY0-NEXT: [[TMP4981:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4981]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6434]] +// SIMD-ONLY0: if.end6434: +// SIMD-ONLY0-NEXT: [[TMP4982:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4983:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6435:%.*]] = fcmp ogt double [[TMP4982]], [[TMP4983]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6435]], label [[IF_THEN6437:%.*]], label [[IF_END6438:%.*]] +// SIMD-ONLY0: if.then6437: +// SIMD-ONLY0-NEXT: [[TMP4984:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4984]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6438]] +// SIMD-ONLY0: if.end6438: +// SIMD-ONLY0-NEXT: [[TMP4985:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4985]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4986:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4987:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6439:%.*]] = fcmp ogt double [[TMP4986]], [[TMP4987]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6439]], label [[IF_THEN6441:%.*]], label [[IF_END6442:%.*]] +// SIMD-ONLY0: if.then6441: +// SIMD-ONLY0-NEXT: [[TMP4988:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4988]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6442]] +// SIMD-ONLY0: if.end6442: +// SIMD-ONLY0-NEXT: [[TMP4989:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4989]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4990:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4991:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6443:%.*]] = fcmp olt double [[TMP4990]], [[TMP4991]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6443]], label [[IF_THEN6445:%.*]], label [[IF_END6446:%.*]] +// SIMD-ONLY0: if.then6445: +// SIMD-ONLY0-NEXT: [[TMP4992:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4992]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6446]] +// SIMD-ONLY0: if.end6446: +// SIMD-ONLY0-NEXT: [[TMP4993:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4993]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4994:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4995:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6447:%.*]] = fcmp olt double [[TMP4994]], [[TMP4995]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6447]], label [[IF_THEN6449:%.*]], label [[IF_END6450:%.*]] +// SIMD-ONLY0: if.then6449: +// SIMD-ONLY0-NEXT: [[TMP4996:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4996]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6450]] +// SIMD-ONLY0: if.end6450: +// SIMD-ONLY0-NEXT: [[TMP4997:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP4997]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4998:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4999:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6451:%.*]] = fcmp oeq double [[TMP4998]], [[TMP4999]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6451]], label [[IF_THEN6453:%.*]], label [[IF_END6454:%.*]] +// SIMD-ONLY0: if.then6453: +// SIMD-ONLY0-NEXT: [[TMP5000:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5000]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6454]] +// SIMD-ONLY0: if.end6454: +// SIMD-ONLY0-NEXT: [[TMP5001:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5001]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5002:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5003:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6455:%.*]] = fcmp oeq double [[TMP5002]], [[TMP5003]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6455]], label [[IF_THEN6457:%.*]], label [[IF_END6458:%.*]] +// SIMD-ONLY0: if.then6457: +// SIMD-ONLY0-NEXT: [[TMP5004:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5004]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6458]] +// SIMD-ONLY0: if.end6458: +// SIMD-ONLY0-NEXT: [[TMP5005:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5005]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5006:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5007:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6459:%.*]] = fcmp oeq double [[TMP5006]], [[TMP5007]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6459]], label [[IF_THEN6461:%.*]], label [[IF_ELSE6462:%.*]] +// SIMD-ONLY0: if.then6461: +// SIMD-ONLY0-NEXT: [[TMP5008:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5008]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6463:%.*]] +// SIMD-ONLY0: if.else6462: +// SIMD-ONLY0-NEXT: [[TMP5009:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5009]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6463]] +// SIMD-ONLY0: if.end6463: +// SIMD-ONLY0-NEXT: [[TMP5010:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5011:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6464:%.*]] = fcmp oeq double [[TMP5010]], [[TMP5011]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6464]], label [[IF_THEN6466:%.*]], label [[IF_ELSE6467:%.*]] +// SIMD-ONLY0: if.then6466: +// SIMD-ONLY0-NEXT: [[TMP5012:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5012]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6468:%.*]] +// SIMD-ONLY0: if.else6467: +// SIMD-ONLY0-NEXT: [[TMP5013:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5013]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6468]] +// SIMD-ONLY0: if.end6468: +// SIMD-ONLY0-NEXT: [[TMP5014:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5015:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6469:%.*]] = fcmp oeq double [[TMP5014]], [[TMP5015]] +// SIMD-ONLY0-NEXT: [[CONV6470:%.*]] = zext i1 [[CMP6469]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6470]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5016:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6471:%.*]] = icmp ne i32 [[TMP5016]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6471]], label [[IF_THEN6472:%.*]], label [[IF_END6473:%.*]] +// SIMD-ONLY0: if.then6472: +// SIMD-ONLY0-NEXT: [[TMP5017:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5017]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6473]] +// SIMD-ONLY0: if.end6473: +// SIMD-ONLY0-NEXT: [[TMP5018:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5019:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6474:%.*]] = fcmp oeq double [[TMP5018]], [[TMP5019]] +// SIMD-ONLY0-NEXT: [[CONV6475:%.*]] = zext i1 [[CMP6474]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6475]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5020:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6476:%.*]] = icmp ne i32 [[TMP5020]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6476]], label [[IF_THEN6477:%.*]], label [[IF_END6478:%.*]] +// SIMD-ONLY0: if.then6477: +// SIMD-ONLY0-NEXT: [[TMP5021:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5021]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6478]] +// SIMD-ONLY0: if.end6478: +// SIMD-ONLY0-NEXT: [[TMP5022:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5023:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6479:%.*]] = fcmp oeq double [[TMP5022]], [[TMP5023]] +// SIMD-ONLY0-NEXT: [[CONV6480:%.*]] = zext i1 [[CMP6479]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6480]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5024:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6481:%.*]] = icmp ne i32 [[TMP5024]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6481]], label [[IF_THEN6482:%.*]], label [[IF_ELSE6483:%.*]] +// SIMD-ONLY0: if.then6482: +// SIMD-ONLY0-NEXT: [[TMP5025:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5025]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6484:%.*]] +// SIMD-ONLY0: if.else6483: +// SIMD-ONLY0-NEXT: [[TMP5026:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5026]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6484]] +// SIMD-ONLY0: if.end6484: +// SIMD-ONLY0-NEXT: [[TMP5027:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5028:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6485:%.*]] = fcmp oeq double [[TMP5027]], [[TMP5028]] +// SIMD-ONLY0-NEXT: [[CONV6486:%.*]] = zext i1 [[CMP6485]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6486]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5029:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6487:%.*]] = icmp ne i32 [[TMP5029]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6487]], label [[IF_THEN6488:%.*]], label [[IF_ELSE6489:%.*]] +// SIMD-ONLY0: if.then6488: +// SIMD-ONLY0-NEXT: [[TMP5030:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5030]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6490:%.*]] +// SIMD-ONLY0: if.else6489: +// SIMD-ONLY0-NEXT: [[TMP5031:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5031]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6490]] +// SIMD-ONLY0: if.end6490: +// SIMD-ONLY0-NEXT: [[TMP5032:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5032]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5033:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5034:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6491:%.*]] = fcmp ogt double [[TMP5033]], [[TMP5034]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6491]], label [[IF_THEN6493:%.*]], label [[IF_END6494:%.*]] +// SIMD-ONLY0: if.then6493: +// SIMD-ONLY0-NEXT: [[TMP5035:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5035]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6494]] +// SIMD-ONLY0: if.end6494: +// SIMD-ONLY0-NEXT: [[TMP5036:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5036]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5037:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5038:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6495:%.*]] = fcmp ogt double [[TMP5037]], [[TMP5038]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6495]], label [[IF_THEN6497:%.*]], label [[IF_END6498:%.*]] +// SIMD-ONLY0: if.then6497: +// SIMD-ONLY0-NEXT: [[TMP5039:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5039]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6498]] +// SIMD-ONLY0: if.end6498: +// SIMD-ONLY0-NEXT: [[TMP5040:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5040]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5041:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5042:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6499:%.*]] = fcmp olt double [[TMP5041]], [[TMP5042]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6499]], label [[IF_THEN6501:%.*]], label [[IF_END6502:%.*]] +// SIMD-ONLY0: if.then6501: +// SIMD-ONLY0-NEXT: [[TMP5043:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5043]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6502]] +// SIMD-ONLY0: if.end6502: +// SIMD-ONLY0-NEXT: [[TMP5044:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5044]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5045:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5046:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6503:%.*]] = fcmp olt double [[TMP5045]], [[TMP5046]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6503]], label [[IF_THEN6505:%.*]], label [[IF_END6506:%.*]] +// SIMD-ONLY0: if.then6505: +// SIMD-ONLY0-NEXT: [[TMP5047:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5047]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6506]] +// SIMD-ONLY0: if.end6506: +// SIMD-ONLY0-NEXT: [[TMP5048:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5048]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5049:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5050:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6507:%.*]] = fcmp oeq double [[TMP5049]], [[TMP5050]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6507]], label [[IF_THEN6509:%.*]], label [[IF_END6510:%.*]] +// SIMD-ONLY0: if.then6509: +// SIMD-ONLY0-NEXT: [[TMP5051:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5051]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6510]] +// SIMD-ONLY0: if.end6510: +// SIMD-ONLY0-NEXT: [[TMP5052:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5052]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5053:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5054:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6511:%.*]] = fcmp oeq double [[TMP5053]], [[TMP5054]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6511]], label [[IF_THEN6513:%.*]], label [[IF_END6514:%.*]] +// SIMD-ONLY0: if.then6513: +// SIMD-ONLY0-NEXT: [[TMP5055:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5055]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6514]] +// SIMD-ONLY0: if.end6514: +// SIMD-ONLY0-NEXT: [[TMP5056:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5057:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6515:%.*]] = fcmp ogt double [[TMP5056]], [[TMP5057]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6515]], label [[IF_THEN6517:%.*]], label [[IF_END6518:%.*]] +// SIMD-ONLY0: if.then6517: +// SIMD-ONLY0-NEXT: [[TMP5058:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5058]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6518]] +// SIMD-ONLY0: if.end6518: +// SIMD-ONLY0-NEXT: [[TMP5059:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5059]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5060:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5061:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6519:%.*]] = fcmp ogt double [[TMP5060]], [[TMP5061]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6519]], label [[IF_THEN6521:%.*]], label [[IF_END6522:%.*]] +// SIMD-ONLY0: if.then6521: +// SIMD-ONLY0-NEXT: [[TMP5062:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5062]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6522]] +// SIMD-ONLY0: if.end6522: +// SIMD-ONLY0-NEXT: [[TMP5063:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5063]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5064:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5065:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6523:%.*]] = fcmp olt double [[TMP5064]], [[TMP5065]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6523]], label [[IF_THEN6525:%.*]], label [[IF_END6526:%.*]] +// SIMD-ONLY0: if.then6525: +// SIMD-ONLY0-NEXT: [[TMP5066:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5066]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6526]] +// SIMD-ONLY0: if.end6526: +// SIMD-ONLY0-NEXT: [[TMP5067:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5067]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5068:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5069:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6527:%.*]] = fcmp olt double [[TMP5068]], [[TMP5069]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6527]], label [[IF_THEN6529:%.*]], label [[IF_END6530:%.*]] +// SIMD-ONLY0: if.then6529: +// SIMD-ONLY0-NEXT: [[TMP5070:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5070]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6530]] +// SIMD-ONLY0: if.end6530: +// SIMD-ONLY0-NEXT: [[TMP5071:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5071]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5072:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5073:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6531:%.*]] = fcmp oeq double [[TMP5072]], [[TMP5073]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6531]], label [[IF_THEN6533:%.*]], label [[IF_END6534:%.*]] +// SIMD-ONLY0: if.then6533: +// SIMD-ONLY0-NEXT: [[TMP5074:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5074]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6534]] +// SIMD-ONLY0: if.end6534: +// SIMD-ONLY0-NEXT: [[TMP5075:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5075]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5076:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5077:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6535:%.*]] = fcmp oeq double [[TMP5076]], [[TMP5077]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6535]], label [[IF_THEN6537:%.*]], label [[IF_END6538:%.*]] +// SIMD-ONLY0: if.then6537: +// SIMD-ONLY0-NEXT: [[TMP5078:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5078]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6538]] +// SIMD-ONLY0: if.end6538: +// SIMD-ONLY0-NEXT: [[TMP5079:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5079]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5080:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5081:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6539:%.*]] = fcmp oeq double [[TMP5080]], [[TMP5081]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6539]], label [[IF_THEN6541:%.*]], label [[IF_ELSE6542:%.*]] +// SIMD-ONLY0: if.then6541: +// SIMD-ONLY0-NEXT: [[TMP5082:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5082]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6543:%.*]] +// SIMD-ONLY0: if.else6542: +// SIMD-ONLY0-NEXT: [[TMP5083:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5083]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6543]] +// SIMD-ONLY0: if.end6543: +// SIMD-ONLY0-NEXT: [[TMP5084:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5085:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6544:%.*]] = fcmp oeq double [[TMP5084]], [[TMP5085]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6544]], label [[IF_THEN6546:%.*]], label [[IF_ELSE6547:%.*]] +// SIMD-ONLY0: if.then6546: +// SIMD-ONLY0-NEXT: [[TMP5086:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5086]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6548:%.*]] +// SIMD-ONLY0: if.else6547: +// SIMD-ONLY0-NEXT: [[TMP5087:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5087]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6548]] +// SIMD-ONLY0: if.end6548: +// SIMD-ONLY0-NEXT: [[TMP5088:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5089:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6549:%.*]] = fcmp oeq double [[TMP5088]], [[TMP5089]] +// SIMD-ONLY0-NEXT: [[CONV6550:%.*]] = zext i1 [[CMP6549]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6550]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5090:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6551:%.*]] = icmp ne i32 [[TMP5090]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6551]], label [[IF_THEN6552:%.*]], label [[IF_END6553:%.*]] +// SIMD-ONLY0: if.then6552: +// SIMD-ONLY0-NEXT: [[TMP5091:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5091]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6553]] +// SIMD-ONLY0: if.end6553: +// SIMD-ONLY0-NEXT: [[TMP5092:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5093:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6554:%.*]] = fcmp oeq double [[TMP5092]], [[TMP5093]] +// SIMD-ONLY0-NEXT: [[CONV6555:%.*]] = zext i1 [[CMP6554]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6555]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5094:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6556:%.*]] = icmp ne i32 [[TMP5094]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6556]], label [[IF_THEN6557:%.*]], label [[IF_END6558:%.*]] +// SIMD-ONLY0: if.then6557: +// SIMD-ONLY0-NEXT: [[TMP5095:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5095]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6558]] +// SIMD-ONLY0: if.end6558: +// SIMD-ONLY0-NEXT: [[TMP5096:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5097:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6559:%.*]] = fcmp oeq double [[TMP5096]], [[TMP5097]] +// SIMD-ONLY0-NEXT: [[CONV6560:%.*]] = zext i1 [[CMP6559]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6560]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5098:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6561:%.*]] = icmp ne i32 [[TMP5098]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6561]], label [[IF_THEN6562:%.*]], label [[IF_ELSE6563:%.*]] +// SIMD-ONLY0: if.then6562: +// SIMD-ONLY0-NEXT: [[TMP5099:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5099]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6564:%.*]] +// SIMD-ONLY0: if.else6563: +// SIMD-ONLY0-NEXT: [[TMP5100:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5100]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6564]] +// SIMD-ONLY0: if.end6564: +// SIMD-ONLY0-NEXT: [[TMP5101:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5102:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6565:%.*]] = fcmp oeq double [[TMP5101]], [[TMP5102]] +// SIMD-ONLY0-NEXT: [[CONV6566:%.*]] = zext i1 [[CMP6565]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6566]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5103:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6567:%.*]] = icmp ne i32 [[TMP5103]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6567]], label [[IF_THEN6568:%.*]], label [[IF_ELSE6569:%.*]] +// SIMD-ONLY0: if.then6568: +// SIMD-ONLY0-NEXT: [[TMP5104:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5104]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6570:%.*]] +// SIMD-ONLY0: if.else6569: +// SIMD-ONLY0-NEXT: [[TMP5105:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5105]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6570]] +// SIMD-ONLY0: if.end6570: +// SIMD-ONLY0-NEXT: [[TMP5106:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5106]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5107:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5108:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6571:%.*]] = fcmp ogt double [[TMP5107]], [[TMP5108]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6571]], label [[IF_THEN6573:%.*]], label [[IF_END6574:%.*]] +// SIMD-ONLY0: if.then6573: +// SIMD-ONLY0-NEXT: [[TMP5109:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5109]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6574]] +// SIMD-ONLY0: if.end6574: +// SIMD-ONLY0-NEXT: [[TMP5110:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5110]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5111:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5112:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6575:%.*]] = fcmp ogt double [[TMP5111]], [[TMP5112]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6575]], label [[IF_THEN6577:%.*]], label [[IF_END6578:%.*]] +// SIMD-ONLY0: if.then6577: +// SIMD-ONLY0-NEXT: [[TMP5113:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5113]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6578]] +// SIMD-ONLY0: if.end6578: +// SIMD-ONLY0-NEXT: [[TMP5114:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5114]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5115:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5116:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6579:%.*]] = fcmp olt double [[TMP5115]], [[TMP5116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6579]], label [[IF_THEN6581:%.*]], label [[IF_END6582:%.*]] +// SIMD-ONLY0: if.then6581: +// SIMD-ONLY0-NEXT: [[TMP5117:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5117]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6582]] +// SIMD-ONLY0: if.end6582: +// SIMD-ONLY0-NEXT: [[TMP5118:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5118]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5119:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5120:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6583:%.*]] = fcmp olt double [[TMP5119]], [[TMP5120]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6583]], label [[IF_THEN6585:%.*]], label [[IF_END6586:%.*]] +// SIMD-ONLY0: if.then6585: +// SIMD-ONLY0-NEXT: [[TMP5121:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5121]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6586]] +// SIMD-ONLY0: if.end6586: +// SIMD-ONLY0-NEXT: [[TMP5122:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5122]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5123:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5124:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6587:%.*]] = fcmp oeq double [[TMP5123]], [[TMP5124]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6587]], label [[IF_THEN6589:%.*]], label [[IF_END6590:%.*]] +// SIMD-ONLY0: if.then6589: +// SIMD-ONLY0-NEXT: [[TMP5125:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5125]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6590]] +// SIMD-ONLY0: if.end6590: +// SIMD-ONLY0-NEXT: [[TMP5126:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5126]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5127:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5128:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6591:%.*]] = fcmp oeq double [[TMP5127]], [[TMP5128]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6591]], label [[IF_THEN6593:%.*]], label [[IF_END6594:%.*]] +// SIMD-ONLY0: if.then6593: +// SIMD-ONLY0-NEXT: [[TMP5129:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5129]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6594]] +// SIMD-ONLY0: if.end6594: +// SIMD-ONLY0-NEXT: [[TMP5130:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5131:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6595:%.*]] = fcmp ogt double [[TMP5130]], [[TMP5131]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6595]], label [[IF_THEN6597:%.*]], label [[IF_END6598:%.*]] +// SIMD-ONLY0: if.then6597: +// SIMD-ONLY0-NEXT: [[TMP5132:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5132]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6598]] +// SIMD-ONLY0: if.end6598: +// SIMD-ONLY0-NEXT: [[TMP5133:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5133]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5134:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5135:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6599:%.*]] = fcmp ogt double [[TMP5134]], [[TMP5135]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6599]], label [[IF_THEN6601:%.*]], label [[IF_END6602:%.*]] +// SIMD-ONLY0: if.then6601: +// SIMD-ONLY0-NEXT: [[TMP5136:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5136]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6602]] +// SIMD-ONLY0: if.end6602: +// SIMD-ONLY0-NEXT: [[TMP5137:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5137]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5138:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5139:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6603:%.*]] = fcmp olt double [[TMP5138]], [[TMP5139]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6603]], label [[IF_THEN6605:%.*]], label [[IF_END6606:%.*]] +// SIMD-ONLY0: if.then6605: +// SIMD-ONLY0-NEXT: [[TMP5140:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5140]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6606]] +// SIMD-ONLY0: if.end6606: +// SIMD-ONLY0-NEXT: [[TMP5141:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5141]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5142:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5143:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6607:%.*]] = fcmp olt double [[TMP5142]], [[TMP5143]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6607]], label [[IF_THEN6609:%.*]], label [[IF_END6610:%.*]] +// SIMD-ONLY0: if.then6609: +// SIMD-ONLY0-NEXT: [[TMP5144:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5144]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6610]] +// SIMD-ONLY0: if.end6610: +// SIMD-ONLY0-NEXT: [[TMP5145:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5145]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5146:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5147:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6611:%.*]] = fcmp oeq double [[TMP5146]], [[TMP5147]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6611]], label [[IF_THEN6613:%.*]], label [[IF_END6614:%.*]] +// SIMD-ONLY0: if.then6613: +// SIMD-ONLY0-NEXT: [[TMP5148:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5148]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6614]] +// SIMD-ONLY0: if.end6614: +// SIMD-ONLY0-NEXT: [[TMP5149:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5149]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5150:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5151:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6615:%.*]] = fcmp oeq double [[TMP5150]], [[TMP5151]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6615]], label [[IF_THEN6617:%.*]], label [[IF_END6618:%.*]] +// SIMD-ONLY0: if.then6617: +// SIMD-ONLY0-NEXT: [[TMP5152:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5152]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6618]] +// SIMD-ONLY0: if.end6618: +// SIMD-ONLY0-NEXT: [[TMP5153:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5153]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5154:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5155:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6619:%.*]] = fcmp oeq double [[TMP5154]], [[TMP5155]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6619]], label [[IF_THEN6621:%.*]], label [[IF_ELSE6622:%.*]] +// SIMD-ONLY0: if.then6621: +// SIMD-ONLY0-NEXT: [[TMP5156:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5156]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6623:%.*]] +// SIMD-ONLY0: if.else6622: +// SIMD-ONLY0-NEXT: [[TMP5157:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5157]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6623]] +// SIMD-ONLY0: if.end6623: +// SIMD-ONLY0-NEXT: [[TMP5158:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5159:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6624:%.*]] = fcmp oeq double [[TMP5158]], [[TMP5159]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6624]], label [[IF_THEN6626:%.*]], label [[IF_ELSE6627:%.*]] +// SIMD-ONLY0: if.then6626: +// SIMD-ONLY0-NEXT: [[TMP5160:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5160]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6628:%.*]] +// SIMD-ONLY0: if.else6627: +// SIMD-ONLY0-NEXT: [[TMP5161:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5161]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6628]] +// SIMD-ONLY0: if.end6628: +// SIMD-ONLY0-NEXT: [[TMP5162:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5163:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6629:%.*]] = fcmp oeq double [[TMP5162]], [[TMP5163]] +// SIMD-ONLY0-NEXT: [[CONV6630:%.*]] = zext i1 [[CMP6629]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6630]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5164:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6631:%.*]] = icmp ne i32 [[TMP5164]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6631]], label [[IF_THEN6632:%.*]], label [[IF_END6633:%.*]] +// SIMD-ONLY0: if.then6632: +// SIMD-ONLY0-NEXT: [[TMP5165:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5165]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6633]] +// SIMD-ONLY0: if.end6633: +// SIMD-ONLY0-NEXT: [[TMP5166:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5167:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6634:%.*]] = fcmp oeq double [[TMP5166]], [[TMP5167]] +// SIMD-ONLY0-NEXT: [[CONV6635:%.*]] = zext i1 [[CMP6634]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6635]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5168:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6636:%.*]] = icmp ne i32 [[TMP5168]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6636]], label [[IF_THEN6637:%.*]], label [[IF_END6638:%.*]] +// SIMD-ONLY0: if.then6637: +// SIMD-ONLY0-NEXT: [[TMP5169:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5169]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6638]] +// SIMD-ONLY0: if.end6638: +// SIMD-ONLY0-NEXT: [[TMP5170:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5171:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6639:%.*]] = fcmp oeq double [[TMP5170]], [[TMP5171]] +// SIMD-ONLY0-NEXT: [[CONV6640:%.*]] = zext i1 [[CMP6639]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6640]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5172:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6641:%.*]] = icmp ne i32 [[TMP5172]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6641]], label [[IF_THEN6642:%.*]], label [[IF_ELSE6643:%.*]] +// SIMD-ONLY0: if.then6642: +// SIMD-ONLY0-NEXT: [[TMP5173:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5173]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6644:%.*]] +// SIMD-ONLY0: if.else6643: +// SIMD-ONLY0-NEXT: [[TMP5174:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5174]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6644]] +// SIMD-ONLY0: if.end6644: +// SIMD-ONLY0-NEXT: [[TMP5175:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5176:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6645:%.*]] = fcmp oeq double [[TMP5175]], [[TMP5176]] +// SIMD-ONLY0-NEXT: [[CONV6646:%.*]] = zext i1 [[CMP6645]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6646]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5177:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6647:%.*]] = icmp ne i32 [[TMP5177]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6647]], label [[IF_THEN6648:%.*]], label [[IF_ELSE6649:%.*]] +// SIMD-ONLY0: if.then6648: +// SIMD-ONLY0-NEXT: [[TMP5178:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5178]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6650:%.*]] +// SIMD-ONLY0: if.else6649: +// SIMD-ONLY0-NEXT: [[TMP5179:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5179]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6650]] +// SIMD-ONLY0: if.end6650: +// SIMD-ONLY0-NEXT: [[TMP5180:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5180]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5181:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5182:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6651:%.*]] = fcmp ogt double [[TMP5181]], [[TMP5182]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6651]], label [[IF_THEN6653:%.*]], label [[IF_END6654:%.*]] +// SIMD-ONLY0: if.then6653: +// SIMD-ONLY0-NEXT: [[TMP5183:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5183]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6654]] +// SIMD-ONLY0: if.end6654: +// SIMD-ONLY0-NEXT: [[TMP5184:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5184]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5185:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5186:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6655:%.*]] = fcmp ogt double [[TMP5185]], [[TMP5186]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6655]], label [[IF_THEN6657:%.*]], label [[IF_END6658:%.*]] +// SIMD-ONLY0: if.then6657: +// SIMD-ONLY0-NEXT: [[TMP5187:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5187]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6658]] +// SIMD-ONLY0: if.end6658: +// SIMD-ONLY0-NEXT: [[TMP5188:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5188]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5189:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5190:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6659:%.*]] = fcmp olt double [[TMP5189]], [[TMP5190]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6659]], label [[IF_THEN6661:%.*]], label [[IF_END6662:%.*]] +// SIMD-ONLY0: if.then6661: +// SIMD-ONLY0-NEXT: [[TMP5191:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5191]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6662]] +// SIMD-ONLY0: if.end6662: +// SIMD-ONLY0-NEXT: [[TMP5192:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5192]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5193:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5194:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6663:%.*]] = fcmp olt double [[TMP5193]], [[TMP5194]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6663]], label [[IF_THEN6665:%.*]], label [[IF_END6666:%.*]] +// SIMD-ONLY0: if.then6665: +// SIMD-ONLY0-NEXT: [[TMP5195:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5195]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6666]] +// SIMD-ONLY0: if.end6666: +// SIMD-ONLY0-NEXT: [[TMP5196:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5196]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5197:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5198:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6667:%.*]] = fcmp oeq double [[TMP5197]], [[TMP5198]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6667]], label [[IF_THEN6669:%.*]], label [[IF_END6670:%.*]] +// SIMD-ONLY0: if.then6669: +// SIMD-ONLY0-NEXT: [[TMP5199:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5199]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6670]] +// SIMD-ONLY0: if.end6670: +// SIMD-ONLY0-NEXT: [[TMP5200:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5200]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5201:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5202:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6671:%.*]] = fcmp oeq double [[TMP5201]], [[TMP5202]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6671]], label [[IF_THEN6673:%.*]], label [[IF_END6674:%.*]] +// SIMD-ONLY0: if.then6673: +// SIMD-ONLY0-NEXT: [[TMP5203:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5203]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6674]] +// SIMD-ONLY0: if.end6674: +// SIMD-ONLY0-NEXT: [[TMP5204:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5205:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6675:%.*]] = fcmp ogt double [[TMP5204]], [[TMP5205]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6675]], label [[IF_THEN6677:%.*]], label [[IF_END6678:%.*]] +// SIMD-ONLY0: if.then6677: +// SIMD-ONLY0-NEXT: [[TMP5206:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5206]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6678]] +// SIMD-ONLY0: if.end6678: +// SIMD-ONLY0-NEXT: [[TMP5207:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5207]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5208:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5209:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6679:%.*]] = fcmp ogt double [[TMP5208]], [[TMP5209]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6679]], label [[IF_THEN6681:%.*]], label [[IF_END6682:%.*]] +// SIMD-ONLY0: if.then6681: +// SIMD-ONLY0-NEXT: [[TMP5210:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5210]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6682]] +// SIMD-ONLY0: if.end6682: +// SIMD-ONLY0-NEXT: [[TMP5211:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5211]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5212:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5213:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6683:%.*]] = fcmp olt double [[TMP5212]], [[TMP5213]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6683]], label [[IF_THEN6685:%.*]], label [[IF_END6686:%.*]] +// SIMD-ONLY0: if.then6685: +// SIMD-ONLY0-NEXT: [[TMP5214:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5214]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6686]] +// SIMD-ONLY0: if.end6686: +// SIMD-ONLY0-NEXT: [[TMP5215:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5215]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5216:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5217:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6687:%.*]] = fcmp olt double [[TMP5216]], [[TMP5217]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6687]], label [[IF_THEN6689:%.*]], label [[IF_END6690:%.*]] +// SIMD-ONLY0: if.then6689: +// SIMD-ONLY0-NEXT: [[TMP5218:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5218]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6690]] +// SIMD-ONLY0: if.end6690: +// SIMD-ONLY0-NEXT: [[TMP5219:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5219]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5220:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5221:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6691:%.*]] = fcmp oeq double [[TMP5220]], [[TMP5221]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6691]], label [[IF_THEN6693:%.*]], label [[IF_END6694:%.*]] +// SIMD-ONLY0: if.then6693: +// SIMD-ONLY0-NEXT: [[TMP5222:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5222]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6694]] +// SIMD-ONLY0: if.end6694: +// SIMD-ONLY0-NEXT: [[TMP5223:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5223]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5224:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5225:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6695:%.*]] = fcmp oeq double [[TMP5224]], [[TMP5225]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6695]], label [[IF_THEN6697:%.*]], label [[IF_END6698:%.*]] +// SIMD-ONLY0: if.then6697: +// SIMD-ONLY0-NEXT: [[TMP5226:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5226]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6698]] +// SIMD-ONLY0: if.end6698: +// SIMD-ONLY0-NEXT: [[TMP5227:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5227]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5228:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5229:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6699:%.*]] = fcmp oeq double [[TMP5228]], [[TMP5229]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6699]], label [[IF_THEN6701:%.*]], label [[IF_ELSE6702:%.*]] +// SIMD-ONLY0: if.then6701: +// SIMD-ONLY0-NEXT: [[TMP5230:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5230]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6703:%.*]] +// SIMD-ONLY0: if.else6702: +// SIMD-ONLY0-NEXT: [[TMP5231:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5231]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6703]] +// SIMD-ONLY0: if.end6703: +// SIMD-ONLY0-NEXT: [[TMP5232:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5233:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6704:%.*]] = fcmp oeq double [[TMP5232]], [[TMP5233]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6704]], label [[IF_THEN6706:%.*]], label [[IF_ELSE6707:%.*]] +// SIMD-ONLY0: if.then6706: +// SIMD-ONLY0-NEXT: [[TMP5234:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5234]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6708:%.*]] +// SIMD-ONLY0: if.else6707: +// SIMD-ONLY0-NEXT: [[TMP5235:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5235]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6708]] +// SIMD-ONLY0: if.end6708: +// SIMD-ONLY0-NEXT: [[TMP5236:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5237:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6709:%.*]] = fcmp oeq double [[TMP5236]], [[TMP5237]] +// SIMD-ONLY0-NEXT: [[CONV6710:%.*]] = zext i1 [[CMP6709]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6710]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5238:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6711:%.*]] = icmp ne i32 [[TMP5238]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6711]], label [[IF_THEN6712:%.*]], label [[IF_END6713:%.*]] +// SIMD-ONLY0: if.then6712: +// SIMD-ONLY0-NEXT: [[TMP5239:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5239]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6713]] +// SIMD-ONLY0: if.end6713: +// SIMD-ONLY0-NEXT: [[TMP5240:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5241:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6714:%.*]] = fcmp oeq double [[TMP5240]], [[TMP5241]] +// SIMD-ONLY0-NEXT: [[CONV6715:%.*]] = zext i1 [[CMP6714]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6715]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5242:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6716:%.*]] = icmp ne i32 [[TMP5242]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6716]], label [[IF_THEN6717:%.*]], label [[IF_END6718:%.*]] +// SIMD-ONLY0: if.then6717: +// SIMD-ONLY0-NEXT: [[TMP5243:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5243]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6718]] +// SIMD-ONLY0: if.end6718: +// SIMD-ONLY0-NEXT: [[TMP5244:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5245:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6719:%.*]] = fcmp oeq double [[TMP5244]], [[TMP5245]] +// SIMD-ONLY0-NEXT: [[CONV6720:%.*]] = zext i1 [[CMP6719]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6720]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5246:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6721:%.*]] = icmp ne i32 [[TMP5246]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6721]], label [[IF_THEN6722:%.*]], label [[IF_ELSE6723:%.*]] +// SIMD-ONLY0: if.then6722: +// SIMD-ONLY0-NEXT: [[TMP5247:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5247]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6724:%.*]] +// SIMD-ONLY0: if.else6723: +// SIMD-ONLY0-NEXT: [[TMP5248:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5248]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6724]] +// SIMD-ONLY0: if.end6724: +// SIMD-ONLY0-NEXT: [[TMP5249:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5250:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6725:%.*]] = fcmp oeq double [[TMP5249]], [[TMP5250]] +// SIMD-ONLY0-NEXT: [[CONV6726:%.*]] = zext i1 [[CMP6725]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6726]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5251:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6727:%.*]] = icmp ne i32 [[TMP5251]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6727]], label [[IF_THEN6728:%.*]], label [[IF_ELSE6729:%.*]] +// SIMD-ONLY0: if.then6728: +// SIMD-ONLY0-NEXT: [[TMP5252:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5252]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6730:%.*]] +// SIMD-ONLY0: if.else6729: +// SIMD-ONLY0-NEXT: [[TMP5253:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5253]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6730]] +// SIMD-ONLY0: if.end6730: +// SIMD-ONLY0-NEXT: [[TMP5254:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5254]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5255:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5256:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6731:%.*]] = fcmp ogt double [[TMP5255]], [[TMP5256]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6731]], label [[IF_THEN6733:%.*]], label [[IF_END6734:%.*]] +// SIMD-ONLY0: if.then6733: +// SIMD-ONLY0-NEXT: [[TMP5257:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5257]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6734]] +// SIMD-ONLY0: if.end6734: +// SIMD-ONLY0-NEXT: [[TMP5258:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5258]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5259:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5260:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6735:%.*]] = fcmp ogt double [[TMP5259]], [[TMP5260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6735]], label [[IF_THEN6737:%.*]], label [[IF_END6738:%.*]] +// SIMD-ONLY0: if.then6737: +// SIMD-ONLY0-NEXT: [[TMP5261:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5261]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6738]] +// SIMD-ONLY0: if.end6738: +// SIMD-ONLY0-NEXT: [[TMP5262:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5262]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5263:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5264:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6739:%.*]] = fcmp olt double [[TMP5263]], [[TMP5264]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6739]], label [[IF_THEN6741:%.*]], label [[IF_END6742:%.*]] +// SIMD-ONLY0: if.then6741: +// SIMD-ONLY0-NEXT: [[TMP5265:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5265]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6742]] +// SIMD-ONLY0: if.end6742: +// SIMD-ONLY0-NEXT: [[TMP5266:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5266]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5267:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5268:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6743:%.*]] = fcmp olt double [[TMP5267]], [[TMP5268]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6743]], label [[IF_THEN6745:%.*]], label [[IF_END6746:%.*]] +// SIMD-ONLY0: if.then6745: +// SIMD-ONLY0-NEXT: [[TMP5269:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5269]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6746]] +// SIMD-ONLY0: if.end6746: +// SIMD-ONLY0-NEXT: [[TMP5270:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5270]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5271:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5272:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6747:%.*]] = fcmp oeq double [[TMP5271]], [[TMP5272]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6747]], label [[IF_THEN6749:%.*]], label [[IF_END6750:%.*]] +// SIMD-ONLY0: if.then6749: +// SIMD-ONLY0-NEXT: [[TMP5273:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5273]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6750]] +// SIMD-ONLY0: if.end6750: +// SIMD-ONLY0-NEXT: [[TMP5274:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5274]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5275:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5276:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6751:%.*]] = fcmp oeq double [[TMP5275]], [[TMP5276]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6751]], label [[IF_THEN6753:%.*]], label [[IF_END6754:%.*]] +// SIMD-ONLY0: if.then6753: +// SIMD-ONLY0-NEXT: [[TMP5277:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5277]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6754]] +// SIMD-ONLY0: if.end6754: +// SIMD-ONLY0-NEXT: [[TMP5278:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5279:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6755:%.*]] = fcmp ogt double [[TMP5278]], [[TMP5279]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6755]], label [[IF_THEN6757:%.*]], label [[IF_END6758:%.*]] +// SIMD-ONLY0: if.then6757: +// SIMD-ONLY0-NEXT: [[TMP5280:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5280]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6758]] +// SIMD-ONLY0: if.end6758: +// SIMD-ONLY0-NEXT: [[TMP5281:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5281]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5282:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5283:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6759:%.*]] = fcmp ogt double [[TMP5282]], [[TMP5283]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6759]], label [[IF_THEN6761:%.*]], label [[IF_END6762:%.*]] +// SIMD-ONLY0: if.then6761: +// SIMD-ONLY0-NEXT: [[TMP5284:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5284]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6762]] +// SIMD-ONLY0: if.end6762: +// SIMD-ONLY0-NEXT: [[TMP5285:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5285]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5286:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5287:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6763:%.*]] = fcmp olt double [[TMP5286]], [[TMP5287]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6763]], label [[IF_THEN6765:%.*]], label [[IF_END6766:%.*]] +// SIMD-ONLY0: if.then6765: +// SIMD-ONLY0-NEXT: [[TMP5288:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5288]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6766]] +// SIMD-ONLY0: if.end6766: +// SIMD-ONLY0-NEXT: [[TMP5289:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5289]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5290:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5291:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6767:%.*]] = fcmp olt double [[TMP5290]], [[TMP5291]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6767]], label [[IF_THEN6769:%.*]], label [[IF_END6770:%.*]] +// SIMD-ONLY0: if.then6769: +// SIMD-ONLY0-NEXT: [[TMP5292:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5292]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6770]] +// SIMD-ONLY0: if.end6770: +// SIMD-ONLY0-NEXT: [[TMP5293:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5293]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5294:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5295:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6771:%.*]] = fcmp oeq double [[TMP5294]], [[TMP5295]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6771]], label [[IF_THEN6773:%.*]], label [[IF_END6774:%.*]] +// SIMD-ONLY0: if.then6773: +// SIMD-ONLY0-NEXT: [[TMP5296:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5296]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6774]] +// SIMD-ONLY0: if.end6774: +// SIMD-ONLY0-NEXT: [[TMP5297:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5297]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5298:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5299:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6775:%.*]] = fcmp oeq double [[TMP5298]], [[TMP5299]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6775]], label [[IF_THEN6777:%.*]], label [[IF_END6778:%.*]] +// SIMD-ONLY0: if.then6777: +// SIMD-ONLY0-NEXT: [[TMP5300:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5300]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6778]] +// SIMD-ONLY0: if.end6778: +// SIMD-ONLY0-NEXT: [[TMP5301:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5301]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5302:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5303:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6779:%.*]] = fcmp oeq double [[TMP5302]], [[TMP5303]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6779]], label [[IF_THEN6781:%.*]], label [[IF_ELSE6782:%.*]] +// SIMD-ONLY0: if.then6781: +// SIMD-ONLY0-NEXT: [[TMP5304:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5304]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6783:%.*]] +// SIMD-ONLY0: if.else6782: +// SIMD-ONLY0-NEXT: [[TMP5305:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5305]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6783]] +// SIMD-ONLY0: if.end6783: +// SIMD-ONLY0-NEXT: [[TMP5306:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5307:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6784:%.*]] = fcmp oeq double [[TMP5306]], [[TMP5307]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6784]], label [[IF_THEN6786:%.*]], label [[IF_ELSE6787:%.*]] +// SIMD-ONLY0: if.then6786: +// SIMD-ONLY0-NEXT: [[TMP5308:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5308]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6788:%.*]] +// SIMD-ONLY0: if.else6787: +// SIMD-ONLY0-NEXT: [[TMP5309:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5309]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6788]] +// SIMD-ONLY0: if.end6788: +// SIMD-ONLY0-NEXT: [[TMP5310:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5311:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6789:%.*]] = fcmp oeq double [[TMP5310]], [[TMP5311]] +// SIMD-ONLY0-NEXT: [[CONV6790:%.*]] = zext i1 [[CMP6789]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6790]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5312:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6791:%.*]] = icmp ne i32 [[TMP5312]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6791]], label [[IF_THEN6792:%.*]], label [[IF_END6793:%.*]] +// SIMD-ONLY0: if.then6792: +// SIMD-ONLY0-NEXT: [[TMP5313:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5313]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6793]] +// SIMD-ONLY0: if.end6793: +// SIMD-ONLY0-NEXT: [[TMP5314:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5315:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6794:%.*]] = fcmp oeq double [[TMP5314]], [[TMP5315]] +// SIMD-ONLY0-NEXT: [[CONV6795:%.*]] = zext i1 [[CMP6794]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6795]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5316:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6796:%.*]] = icmp ne i32 [[TMP5316]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6796]], label [[IF_THEN6797:%.*]], label [[IF_END6798:%.*]] +// SIMD-ONLY0: if.then6797: +// SIMD-ONLY0-NEXT: [[TMP5317:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5317]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6798]] +// SIMD-ONLY0: if.end6798: +// SIMD-ONLY0-NEXT: [[TMP5318:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5319:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6799:%.*]] = fcmp oeq double [[TMP5318]], [[TMP5319]] +// SIMD-ONLY0-NEXT: [[CONV6800:%.*]] = zext i1 [[CMP6799]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6800]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5320:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6801:%.*]] = icmp ne i32 [[TMP5320]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6801]], label [[IF_THEN6802:%.*]], label [[IF_ELSE6803:%.*]] +// SIMD-ONLY0: if.then6802: +// SIMD-ONLY0-NEXT: [[TMP5321:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5321]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6804:%.*]] +// SIMD-ONLY0: if.else6803: +// SIMD-ONLY0-NEXT: [[TMP5322:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5322]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6804]] +// SIMD-ONLY0: if.end6804: +// SIMD-ONLY0-NEXT: [[TMP5323:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5324:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6805:%.*]] = fcmp oeq double [[TMP5323]], [[TMP5324]] +// SIMD-ONLY0-NEXT: [[CONV6806:%.*]] = zext i1 [[CMP6805]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV6806]], ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5325:%.*]] = load i32, ptr [[IR]], align 4 +// SIMD-ONLY0-NEXT: [[TOBOOL6807:%.*]] = icmp ne i32 [[TMP5325]], 0 +// SIMD-ONLY0-NEXT: br i1 [[TOBOOL6807]], label [[IF_THEN6808:%.*]], label [[IF_ELSE6809:%.*]] +// SIMD-ONLY0: if.then6808: +// SIMD-ONLY0-NEXT: [[TMP5326:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5326]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6810:%.*]] +// SIMD-ONLY0: if.else6809: +// SIMD-ONLY0-NEXT: [[TMP5327:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5327]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: br label [[IF_END6810]] +// SIMD-ONLY0: if.end6810: +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: @cxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[CX:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CV:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CE:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[CD:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP0]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i8 [[TMP2]] to i32 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: [[CONV5:%.*]] = trunc i32 [[COND]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV5]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP5]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV6:%.*]] = sext i8 [[TMP6]] to i32 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV7:%.*]] = sext i8 [[TMP7]] to i32 +// SIMD-ONLY0-NEXT: [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]] +// SIMD-ONLY0: cond.true10: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = sext i8 [[TMP8]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false12: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV13:%.*]] = sext i8 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ] +// SIMD-ONLY0-NEXT: [[CONV16:%.*]] = trunc i32 [[COND15]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV16]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP10]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV17:%.*]] = sext i8 [[TMP11]] to i32 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV18:%.*]] = sext i8 [[TMP12]] to i32 +// SIMD-ONLY0-NEXT: [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]] +// SIMD-ONLY0-NEXT: br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true21: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV22:%.*]] = sext i8 [[TMP13]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV24:%.*]] = sext i8 [[TMP14]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25]] +// SIMD-ONLY0: cond.end25: +// SIMD-ONLY0-NEXT: [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: [[CONV27:%.*]] = trunc i32 [[COND26]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV27]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV28:%.*]] = sext i8 [[TMP15]] to i32 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV29:%.*]] = sext i8 [[TMP16]] to i32 +// SIMD-ONLY0-NEXT: [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]] +// SIMD-ONLY0-NEXT: br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV33:%.*]] = sext i8 [[TMP17]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36:%.*]] +// SIMD-ONLY0: cond.false34: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV35:%.*]] = sext i8 [[TMP18]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36]] +// SIMD-ONLY0: cond.end36: +// SIMD-ONLY0-NEXT: [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ] +// SIMD-ONLY0-NEXT: [[CONV38:%.*]] = trunc i32 [[COND37]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV38]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP19]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV39:%.*]] = sext i8 [[TMP20]] to i32 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV40:%.*]] = sext i8 [[TMP21]] to i32 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]] +// SIMD-ONLY0: cond.true43: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV44:%.*]] = sext i8 [[TMP22]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47:%.*]] +// SIMD-ONLY0: cond.false45: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV46:%.*]] = sext i8 [[TMP23]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47]] +// SIMD-ONLY0: cond.end47: +// SIMD-ONLY0-NEXT: [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ] +// SIMD-ONLY0-NEXT: [[CONV49:%.*]] = trunc i32 [[COND48]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV49]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP24]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV50:%.*]] = sext i8 [[TMP25]] to i32 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV51:%.*]] = sext i8 [[TMP26]] to i32 +// SIMD-ONLY0-NEXT: [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]] +// SIMD-ONLY0: cond.true54: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV55:%.*]] = sext i8 [[TMP27]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58:%.*]] +// SIMD-ONLY0: cond.false56: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV57:%.*]] = sext i8 [[TMP28]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58]] +// SIMD-ONLY0: cond.end58: +// SIMD-ONLY0-NEXT: [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ] +// SIMD-ONLY0-NEXT: [[CONV60:%.*]] = trunc i32 [[COND59]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV60]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP29]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP30]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV61:%.*]] = sext i8 [[TMP31]] to i32 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV62:%.*]] = sext i8 [[TMP32]] to i32 +// SIMD-ONLY0-NEXT: [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]] +// SIMD-ONLY0: cond.true65: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV66:%.*]] = sext i8 [[TMP33]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false67: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV68:%.*]] = sext i8 [[TMP34]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ] +// SIMD-ONLY0-NEXT: [[CONV71:%.*]] = trunc i32 [[COND70]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV71]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP35]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV72:%.*]] = sext i8 [[TMP36]] to i32 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV73:%.*]] = sext i8 [[TMP37]] to i32 +// SIMD-ONLY0-NEXT: [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]] +// SIMD-ONLY0-NEXT: br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true76: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV77:%.*]] = sext i8 [[TMP38]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV79:%.*]] = sext i8 [[TMP39]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80]] +// SIMD-ONLY0: cond.end80: +// SIMD-ONLY0-NEXT: [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: [[CONV82:%.*]] = trunc i32 [[COND81]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV82]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP40]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV83:%.*]] = sext i8 [[TMP41]] to i32 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV84:%.*]] = sext i8 [[TMP42]] to i32 +// SIMD-ONLY0-NEXT: [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]] +// SIMD-ONLY0-NEXT: br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV88:%.*]] = sext i8 [[TMP43]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91:%.*]] +// SIMD-ONLY0: cond.false89: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV90:%.*]] = sext i8 [[TMP44]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91]] +// SIMD-ONLY0: cond.end91: +// SIMD-ONLY0-NEXT: [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ] +// SIMD-ONLY0-NEXT: [[CONV93:%.*]] = trunc i32 [[COND92]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV93]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV94:%.*]] = sext i8 [[TMP45]] to i32 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV95:%.*]] = sext i8 [[TMP46]] to i32 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]] +// SIMD-ONLY0: cond.true98: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV99:%.*]] = sext i8 [[TMP47]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102:%.*]] +// SIMD-ONLY0: cond.false100: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV101:%.*]] = sext i8 [[TMP48]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102]] +// SIMD-ONLY0: cond.end102: +// SIMD-ONLY0-NEXT: [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ] +// SIMD-ONLY0-NEXT: [[CONV104:%.*]] = trunc i32 [[COND103]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV104]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP49]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV105:%.*]] = sext i8 [[TMP50]] to i32 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV106:%.*]] = sext i8 [[TMP51]] to i32 +// SIMD-ONLY0-NEXT: [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]] +// SIMD-ONLY0: cond.true109: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV110:%.*]] = sext i8 [[TMP52]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113:%.*]] +// SIMD-ONLY0: cond.false111: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV112:%.*]] = sext i8 [[TMP53]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113]] +// SIMD-ONLY0: cond.end113: +// SIMD-ONLY0-NEXT: [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ] +// SIMD-ONLY0-NEXT: [[CONV115:%.*]] = trunc i32 [[COND114]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV115]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP54]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV116:%.*]] = sext i8 [[TMP55]] to i32 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV117:%.*]] = sext i8 [[TMP56]] to i32 +// SIMD-ONLY0-NEXT: [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]] +// SIMD-ONLY0-NEXT: br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]] +// SIMD-ONLY0: cond.true120: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV121:%.*]] = sext i8 [[TMP57]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false122: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV123:%.*]] = sext i8 [[TMP58]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ] +// SIMD-ONLY0-NEXT: [[CONV126:%.*]] = trunc i32 [[COND125]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV126]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP59]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP60]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV127:%.*]] = sext i8 [[TMP61]] to i32 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV128:%.*]] = sext i8 [[TMP62]] to i32 +// SIMD-ONLY0-NEXT: [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]] +// SIMD-ONLY0-NEXT: br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true131: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV132:%.*]] = sext i8 [[TMP63]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV134:%.*]] = sext i8 [[TMP64]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135]] +// SIMD-ONLY0: cond.end135: +// SIMD-ONLY0-NEXT: [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: [[CONV137:%.*]] = trunc i32 [[COND136]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV137]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP65]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV138:%.*]] = sext i8 [[TMP66]] to i32 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV139:%.*]] = sext i8 [[TMP67]] to i32 +// SIMD-ONLY0-NEXT: [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]] +// SIMD-ONLY0-NEXT: br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV143:%.*]] = sext i8 [[TMP68]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146:%.*]] +// SIMD-ONLY0: cond.false144: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV145:%.*]] = sext i8 [[TMP69]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146]] +// SIMD-ONLY0: cond.end146: +// SIMD-ONLY0-NEXT: [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ] +// SIMD-ONLY0-NEXT: [[CONV148:%.*]] = trunc i32 [[COND147]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV148]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP70]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV149:%.*]] = sext i8 [[TMP71]] to i32 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV150:%.*]] = sext i8 [[TMP72]] to i32 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]] +// SIMD-ONLY0: cond.true153: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV154:%.*]] = sext i8 [[TMP73]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157:%.*]] +// SIMD-ONLY0: cond.false155: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV156:%.*]] = sext i8 [[TMP74]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157]] +// SIMD-ONLY0: cond.end157: +// SIMD-ONLY0-NEXT: [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ] +// SIMD-ONLY0-NEXT: [[CONV159:%.*]] = trunc i32 [[COND158]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV159]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV160:%.*]] = sext i8 [[TMP75]] to i32 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV161:%.*]] = sext i8 [[TMP76]] to i32 +// SIMD-ONLY0-NEXT: [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]] +// SIMD-ONLY0-NEXT: br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]] +// SIMD-ONLY0: cond.true164: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV165:%.*]] = sext i8 [[TMP77]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168:%.*]] +// SIMD-ONLY0: cond.false166: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV167:%.*]] = sext i8 [[TMP78]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168]] +// SIMD-ONLY0: cond.end168: +// SIMD-ONLY0-NEXT: [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ] +// SIMD-ONLY0-NEXT: [[CONV170:%.*]] = trunc i32 [[COND169]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV170]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP79]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV171:%.*]] = sext i8 [[TMP80]] to i32 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV172:%.*]] = sext i8 [[TMP81]] to i32 +// SIMD-ONLY0-NEXT: [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]] +// SIMD-ONLY0-NEXT: br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]] +// SIMD-ONLY0: cond.true175: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV176:%.*]] = sext i8 [[TMP82]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179:%.*]] +// SIMD-ONLY0: cond.false177: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV178:%.*]] = sext i8 [[TMP83]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179]] +// SIMD-ONLY0: cond.end179: +// SIMD-ONLY0-NEXT: [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ] +// SIMD-ONLY0-NEXT: [[CONV181:%.*]] = trunc i32 [[COND180]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV181]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP84]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV182:%.*]] = sext i8 [[TMP85]] to i32 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV183:%.*]] = sext i8 [[TMP86]] to i32 +// SIMD-ONLY0-NEXT: [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]] +// SIMD-ONLY0-NEXT: br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]] +// SIMD-ONLY0: cond.true186: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV187:%.*]] = sext i8 [[TMP87]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190:%.*]] +// SIMD-ONLY0: cond.false188: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV189:%.*]] = sext i8 [[TMP88]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190]] +// SIMD-ONLY0: cond.end190: +// SIMD-ONLY0-NEXT: [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ] +// SIMD-ONLY0-NEXT: [[CONV192:%.*]] = trunc i32 [[COND191]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV192]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP89]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP90]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV193:%.*]] = sext i8 [[TMP91]] to i32 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV194:%.*]] = sext i8 [[TMP92]] to i32 +// SIMD-ONLY0-NEXT: [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]] +// SIMD-ONLY0-NEXT: br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]] +// SIMD-ONLY0: cond.true197: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV198:%.*]] = sext i8 [[TMP93]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201:%.*]] +// SIMD-ONLY0: cond.false199: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV200:%.*]] = sext i8 [[TMP94]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201]] +// SIMD-ONLY0: cond.end201: +// SIMD-ONLY0-NEXT: [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ] +// SIMD-ONLY0-NEXT: [[CONV203:%.*]] = trunc i32 [[COND202]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV203]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP95]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV204:%.*]] = sext i8 [[TMP96]] to i32 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV205:%.*]] = sext i8 [[TMP97]] to i32 +// SIMD-ONLY0-NEXT: [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]] +// SIMD-ONLY0-NEXT: br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]] +// SIMD-ONLY0: cond.true208: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV209:%.*]] = sext i8 [[TMP98]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212:%.*]] +// SIMD-ONLY0: cond.false210: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV211:%.*]] = sext i8 [[TMP99]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212]] +// SIMD-ONLY0: cond.end212: +// SIMD-ONLY0-NEXT: [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ] +// SIMD-ONLY0-NEXT: [[CONV214:%.*]] = trunc i32 [[COND213]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV214]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP100]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV215:%.*]] = sext i8 [[TMP101]] to i32 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV216:%.*]] = sext i8 [[TMP102]] to i32 +// SIMD-ONLY0-NEXT: [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]] +// SIMD-ONLY0-NEXT: br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]] +// SIMD-ONLY0: cond.true219: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV220:%.*]] = sext i8 [[TMP103]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223:%.*]] +// SIMD-ONLY0: cond.false221: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV222:%.*]] = sext i8 [[TMP104]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223]] +// SIMD-ONLY0: cond.end223: +// SIMD-ONLY0-NEXT: [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ] +// SIMD-ONLY0-NEXT: [[CONV225:%.*]] = trunc i32 [[COND224]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV225]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV226:%.*]] = sext i8 [[TMP105]] to i32 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV227:%.*]] = sext i8 [[TMP106]] to i32 +// SIMD-ONLY0-NEXT: [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]] +// SIMD-ONLY0-NEXT: br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]] +// SIMD-ONLY0: cond.true230: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV231:%.*]] = sext i8 [[TMP107]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234:%.*]] +// SIMD-ONLY0: cond.false232: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV233:%.*]] = sext i8 [[TMP108]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234]] +// SIMD-ONLY0: cond.end234: +// SIMD-ONLY0-NEXT: [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ] +// SIMD-ONLY0-NEXT: [[CONV236:%.*]] = trunc i32 [[COND235]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV236]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP109]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV237:%.*]] = sext i8 [[TMP110]] to i32 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV238:%.*]] = sext i8 [[TMP111]] to i32 +// SIMD-ONLY0-NEXT: [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]] +// SIMD-ONLY0: cond.true241: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV242:%.*]] = sext i8 [[TMP112]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245:%.*]] +// SIMD-ONLY0: cond.false243: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV244:%.*]] = sext i8 [[TMP113]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245]] +// SIMD-ONLY0: cond.end245: +// SIMD-ONLY0-NEXT: [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ] +// SIMD-ONLY0-NEXT: [[CONV247:%.*]] = trunc i32 [[COND246]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV247]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP114]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV248:%.*]] = sext i8 [[TMP115]] to i32 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV249:%.*]] = sext i8 [[TMP116]] to i32 +// SIMD-ONLY0-NEXT: [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]] +// SIMD-ONLY0-NEXT: br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]] +// SIMD-ONLY0: cond.true252: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV253:%.*]] = sext i8 [[TMP117]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256:%.*]] +// SIMD-ONLY0: cond.false254: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV255:%.*]] = sext i8 [[TMP118]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256]] +// SIMD-ONLY0: cond.end256: +// SIMD-ONLY0-NEXT: [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ] +// SIMD-ONLY0-NEXT: [[CONV258:%.*]] = trunc i32 [[COND257]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV258]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP119]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP120]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV259:%.*]] = sext i8 [[TMP121]] to i32 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV260:%.*]] = sext i8 [[TMP122]] to i32 +// SIMD-ONLY0-NEXT: [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]] +// SIMD-ONLY0: cond.true263: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV264:%.*]] = sext i8 [[TMP123]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267:%.*]] +// SIMD-ONLY0: cond.false265: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV266:%.*]] = sext i8 [[TMP124]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267]] +// SIMD-ONLY0: cond.end267: +// SIMD-ONLY0-NEXT: [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ] +// SIMD-ONLY0-NEXT: [[CONV269:%.*]] = trunc i32 [[COND268]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV269]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP125]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV270:%.*]] = sext i8 [[TMP126]] to i32 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV271:%.*]] = sext i8 [[TMP127]] to i32 +// SIMD-ONLY0-NEXT: [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]] +// SIMD-ONLY0-NEXT: br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]] +// SIMD-ONLY0: cond.true274: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV275:%.*]] = sext i8 [[TMP128]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278:%.*]] +// SIMD-ONLY0: cond.false276: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV277:%.*]] = sext i8 [[TMP129]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278]] +// SIMD-ONLY0: cond.end278: +// SIMD-ONLY0-NEXT: [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ] +// SIMD-ONLY0-NEXT: [[CONV280:%.*]] = trunc i32 [[COND279]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV280]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP130]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV281:%.*]] = sext i8 [[TMP131]] to i32 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV282:%.*]] = sext i8 [[TMP132]] to i32 +// SIMD-ONLY0-NEXT: [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]] +// SIMD-ONLY0-NEXT: br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]] +// SIMD-ONLY0: cond.true285: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV286:%.*]] = sext i8 [[TMP133]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289:%.*]] +// SIMD-ONLY0: cond.false287: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV288:%.*]] = sext i8 [[TMP134]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289]] +// SIMD-ONLY0: cond.end289: +// SIMD-ONLY0-NEXT: [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ] +// SIMD-ONLY0-NEXT: [[CONV291:%.*]] = trunc i32 [[COND290]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV291]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV292:%.*]] = sext i8 [[TMP135]] to i32 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV293:%.*]] = sext i8 [[TMP136]] to i32 +// SIMD-ONLY0-NEXT: [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]] +// SIMD-ONLY0-NEXT: br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]] +// SIMD-ONLY0: cond.true296: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV297:%.*]] = sext i8 [[TMP137]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300:%.*]] +// SIMD-ONLY0: cond.false298: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV299:%.*]] = sext i8 [[TMP138]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300]] +// SIMD-ONLY0: cond.end300: +// SIMD-ONLY0-NEXT: [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ] +// SIMD-ONLY0-NEXT: [[CONV302:%.*]] = trunc i32 [[COND301]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV302]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP139]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV303:%.*]] = sext i8 [[TMP140]] to i32 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV304:%.*]] = sext i8 [[TMP141]] to i32 +// SIMD-ONLY0-NEXT: [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]] +// SIMD-ONLY0-NEXT: br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]] +// SIMD-ONLY0: cond.true307: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV308:%.*]] = sext i8 [[TMP142]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311:%.*]] +// SIMD-ONLY0: cond.false309: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV310:%.*]] = sext i8 [[TMP143]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311]] +// SIMD-ONLY0: cond.end311: +// SIMD-ONLY0-NEXT: [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ] +// SIMD-ONLY0-NEXT: [[CONV313:%.*]] = trunc i32 [[COND312]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV313]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP144]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV314:%.*]] = sext i8 [[TMP145]] to i32 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV315:%.*]] = sext i8 [[TMP146]] to i32 +// SIMD-ONLY0-NEXT: [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]] +// SIMD-ONLY0-NEXT: br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]] +// SIMD-ONLY0: cond.true318: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV319:%.*]] = sext i8 [[TMP147]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322:%.*]] +// SIMD-ONLY0: cond.false320: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV321:%.*]] = sext i8 [[TMP148]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322]] +// SIMD-ONLY0: cond.end322: +// SIMD-ONLY0-NEXT: [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ] +// SIMD-ONLY0-NEXT: [[CONV324:%.*]] = trunc i32 [[COND323]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV324]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP149]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP150]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV325:%.*]] = sext i8 [[TMP151]] to i32 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV326:%.*]] = sext i8 [[TMP152]] to i32 +// SIMD-ONLY0-NEXT: [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]] +// SIMD-ONLY0-NEXT: br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]] +// SIMD-ONLY0: cond.true329: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV330:%.*]] = sext i8 [[TMP153]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333:%.*]] +// SIMD-ONLY0: cond.false331: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV332:%.*]] = sext i8 [[TMP154]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333]] +// SIMD-ONLY0: cond.end333: +// SIMD-ONLY0-NEXT: [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ] +// SIMD-ONLY0-NEXT: [[CONV335:%.*]] = trunc i32 [[COND334]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV335]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP155]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV336:%.*]] = sext i8 [[TMP156]] to i32 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV337:%.*]] = sext i8 [[TMP157]] to i32 +// SIMD-ONLY0-NEXT: [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]] +// SIMD-ONLY0-NEXT: br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]] +// SIMD-ONLY0: cond.true340: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV341:%.*]] = sext i8 [[TMP158]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344:%.*]] +// SIMD-ONLY0: cond.false342: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV343:%.*]] = sext i8 [[TMP159]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344]] +// SIMD-ONLY0: cond.end344: +// SIMD-ONLY0-NEXT: [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ] +// SIMD-ONLY0-NEXT: [[CONV346:%.*]] = trunc i32 [[COND345]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV346]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP160]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV347:%.*]] = sext i8 [[TMP161]] to i32 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV348:%.*]] = sext i8 [[TMP162]] to i32 +// SIMD-ONLY0-NEXT: [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]] +// SIMD-ONLY0-NEXT: br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]] +// SIMD-ONLY0: cond.true351: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV352:%.*]] = sext i8 [[TMP163]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355:%.*]] +// SIMD-ONLY0: cond.false353: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV354:%.*]] = sext i8 [[TMP164]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355]] +// SIMD-ONLY0: cond.end355: +// SIMD-ONLY0-NEXT: [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ] +// SIMD-ONLY0-NEXT: [[CONV357:%.*]] = trunc i32 [[COND356]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV357]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV358:%.*]] = sext i8 [[TMP165]] to i32 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV359:%.*]] = sext i8 [[TMP166]] to i32 +// SIMD-ONLY0-NEXT: [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]] +// SIMD-ONLY0-NEXT: br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]] +// SIMD-ONLY0: cond.true362: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV363:%.*]] = sext i8 [[TMP167]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366:%.*]] +// SIMD-ONLY0: cond.false364: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV365:%.*]] = sext i8 [[TMP168]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366]] +// SIMD-ONLY0: cond.end366: +// SIMD-ONLY0-NEXT: [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ] +// SIMD-ONLY0-NEXT: [[CONV368:%.*]] = trunc i32 [[COND367]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV368]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP169]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV369:%.*]] = sext i8 [[TMP170]] to i32 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV370:%.*]] = sext i8 [[TMP171]] to i32 +// SIMD-ONLY0-NEXT: [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]] +// SIMD-ONLY0: cond.true373: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV374:%.*]] = sext i8 [[TMP172]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377:%.*]] +// SIMD-ONLY0: cond.false375: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV376:%.*]] = sext i8 [[TMP173]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377]] +// SIMD-ONLY0: cond.end377: +// SIMD-ONLY0-NEXT: [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ] +// SIMD-ONLY0-NEXT: [[CONV379:%.*]] = trunc i32 [[COND378]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV379]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP174]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV380:%.*]] = sext i8 [[TMP175]] to i32 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i8, ptr [[CE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV381:%.*]] = sext i8 [[TMP176]] to i32 +// SIMD-ONLY0-NEXT: [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]] +// SIMD-ONLY0-NEXT: br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]] +// SIMD-ONLY0: cond.true384: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i8, ptr [[CD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV385:%.*]] = sext i8 [[TMP177]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388:%.*]] +// SIMD-ONLY0: cond.false386: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV387:%.*]] = sext i8 [[TMP178]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388]] +// SIMD-ONLY0: cond.end388: +// SIMD-ONLY0-NEXT: [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ] +// SIMD-ONLY0-NEXT: [[CONV390:%.*]] = trunc i32 [[COND389]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV390]], ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i8, ptr [[CX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP179]], ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i8, ptr [[CV]], align 1 +// SIMD-ONLY0-NEXT: ret i8 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @ucxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[UCX:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCV:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCE:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[UCD:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP0]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = zext i8 [[TMP2]] to i32 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV3:%.*]] = zext i8 [[TMP3]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = zext i8 [[TMP4]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: [[CONV5:%.*]] = trunc i32 [[COND]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV5]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP5]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV6:%.*]] = zext i8 [[TMP6]] to i32 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV7:%.*]] = zext i8 [[TMP7]] to i32 +// SIMD-ONLY0-NEXT: [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]] +// SIMD-ONLY0: cond.true10: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = zext i8 [[TMP8]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false12: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV13:%.*]] = zext i8 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ] +// SIMD-ONLY0-NEXT: [[CONV16:%.*]] = trunc i32 [[COND15]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV16]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP10]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV17:%.*]] = zext i8 [[TMP11]] to i32 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV18:%.*]] = zext i8 [[TMP12]] to i32 +// SIMD-ONLY0-NEXT: [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]] +// SIMD-ONLY0-NEXT: br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true21: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV22:%.*]] = zext i8 [[TMP13]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV24:%.*]] = zext i8 [[TMP14]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25]] +// SIMD-ONLY0: cond.end25: +// SIMD-ONLY0-NEXT: [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: [[CONV27:%.*]] = trunc i32 [[COND26]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV27]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV28:%.*]] = zext i8 [[TMP15]] to i32 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV29:%.*]] = zext i8 [[TMP16]] to i32 +// SIMD-ONLY0-NEXT: [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]] +// SIMD-ONLY0-NEXT: br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV33:%.*]] = zext i8 [[TMP17]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36:%.*]] +// SIMD-ONLY0: cond.false34: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV35:%.*]] = zext i8 [[TMP18]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36]] +// SIMD-ONLY0: cond.end36: +// SIMD-ONLY0-NEXT: [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ] +// SIMD-ONLY0-NEXT: [[CONV38:%.*]] = trunc i32 [[COND37]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV38]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP19]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV39:%.*]] = zext i8 [[TMP20]] to i32 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV40:%.*]] = zext i8 [[TMP21]] to i32 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]] +// SIMD-ONLY0: cond.true43: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV44:%.*]] = zext i8 [[TMP22]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47:%.*]] +// SIMD-ONLY0: cond.false45: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV46:%.*]] = zext i8 [[TMP23]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47]] +// SIMD-ONLY0: cond.end47: +// SIMD-ONLY0-NEXT: [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ] +// SIMD-ONLY0-NEXT: [[CONV49:%.*]] = trunc i32 [[COND48]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV49]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP24]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV50:%.*]] = zext i8 [[TMP25]] to i32 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV51:%.*]] = zext i8 [[TMP26]] to i32 +// SIMD-ONLY0-NEXT: [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]] +// SIMD-ONLY0: cond.true54: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV55:%.*]] = zext i8 [[TMP27]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58:%.*]] +// SIMD-ONLY0: cond.false56: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV57:%.*]] = zext i8 [[TMP28]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58]] +// SIMD-ONLY0: cond.end58: +// SIMD-ONLY0-NEXT: [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ] +// SIMD-ONLY0-NEXT: [[CONV60:%.*]] = trunc i32 [[COND59]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV60]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP29]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP30]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV61:%.*]] = zext i8 [[TMP31]] to i32 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV62:%.*]] = zext i8 [[TMP32]] to i32 +// SIMD-ONLY0-NEXT: [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]] +// SIMD-ONLY0: cond.true65: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV66:%.*]] = zext i8 [[TMP33]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false67: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV68:%.*]] = zext i8 [[TMP34]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ] +// SIMD-ONLY0-NEXT: [[CONV71:%.*]] = trunc i32 [[COND70]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV71]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP35]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV72:%.*]] = zext i8 [[TMP36]] to i32 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV73:%.*]] = zext i8 [[TMP37]] to i32 +// SIMD-ONLY0-NEXT: [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]] +// SIMD-ONLY0-NEXT: br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true76: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV77:%.*]] = zext i8 [[TMP38]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV79:%.*]] = zext i8 [[TMP39]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80]] +// SIMD-ONLY0: cond.end80: +// SIMD-ONLY0-NEXT: [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: [[CONV82:%.*]] = trunc i32 [[COND81]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV82]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP40]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV83:%.*]] = zext i8 [[TMP41]] to i32 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV84:%.*]] = zext i8 [[TMP42]] to i32 +// SIMD-ONLY0-NEXT: [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]] +// SIMD-ONLY0-NEXT: br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV88:%.*]] = zext i8 [[TMP43]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91:%.*]] +// SIMD-ONLY0: cond.false89: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV90:%.*]] = zext i8 [[TMP44]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91]] +// SIMD-ONLY0: cond.end91: +// SIMD-ONLY0-NEXT: [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ] +// SIMD-ONLY0-NEXT: [[CONV93:%.*]] = trunc i32 [[COND92]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV93]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV94:%.*]] = zext i8 [[TMP45]] to i32 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV95:%.*]] = zext i8 [[TMP46]] to i32 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]] +// SIMD-ONLY0: cond.true98: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV99:%.*]] = zext i8 [[TMP47]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102:%.*]] +// SIMD-ONLY0: cond.false100: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV101:%.*]] = zext i8 [[TMP48]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102]] +// SIMD-ONLY0: cond.end102: +// SIMD-ONLY0-NEXT: [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ] +// SIMD-ONLY0-NEXT: [[CONV104:%.*]] = trunc i32 [[COND103]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV104]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP49]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV105:%.*]] = zext i8 [[TMP50]] to i32 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV106:%.*]] = zext i8 [[TMP51]] to i32 +// SIMD-ONLY0-NEXT: [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]] +// SIMD-ONLY0: cond.true109: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV110:%.*]] = zext i8 [[TMP52]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113:%.*]] +// SIMD-ONLY0: cond.false111: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV112:%.*]] = zext i8 [[TMP53]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113]] +// SIMD-ONLY0: cond.end113: +// SIMD-ONLY0-NEXT: [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ] +// SIMD-ONLY0-NEXT: [[CONV115:%.*]] = trunc i32 [[COND114]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV115]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP54]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV116:%.*]] = zext i8 [[TMP55]] to i32 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV117:%.*]] = zext i8 [[TMP56]] to i32 +// SIMD-ONLY0-NEXT: [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]] +// SIMD-ONLY0-NEXT: br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]] +// SIMD-ONLY0: cond.true120: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV121:%.*]] = zext i8 [[TMP57]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false122: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV123:%.*]] = zext i8 [[TMP58]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ] +// SIMD-ONLY0-NEXT: [[CONV126:%.*]] = trunc i32 [[COND125]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV126]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP59]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP60]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV127:%.*]] = zext i8 [[TMP61]] to i32 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV128:%.*]] = zext i8 [[TMP62]] to i32 +// SIMD-ONLY0-NEXT: [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]] +// SIMD-ONLY0-NEXT: br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true131: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV132:%.*]] = zext i8 [[TMP63]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV134:%.*]] = zext i8 [[TMP64]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135]] +// SIMD-ONLY0: cond.end135: +// SIMD-ONLY0-NEXT: [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: [[CONV137:%.*]] = trunc i32 [[COND136]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV137]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP65]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV138:%.*]] = zext i8 [[TMP66]] to i32 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV139:%.*]] = zext i8 [[TMP67]] to i32 +// SIMD-ONLY0-NEXT: [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]] +// SIMD-ONLY0-NEXT: br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV143:%.*]] = zext i8 [[TMP68]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146:%.*]] +// SIMD-ONLY0: cond.false144: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV145:%.*]] = zext i8 [[TMP69]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146]] +// SIMD-ONLY0: cond.end146: +// SIMD-ONLY0-NEXT: [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ] +// SIMD-ONLY0-NEXT: [[CONV148:%.*]] = trunc i32 [[COND147]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV148]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP70]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV149:%.*]] = zext i8 [[TMP71]] to i32 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV150:%.*]] = zext i8 [[TMP72]] to i32 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]] +// SIMD-ONLY0: cond.true153: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV154:%.*]] = zext i8 [[TMP73]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157:%.*]] +// SIMD-ONLY0: cond.false155: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV156:%.*]] = zext i8 [[TMP74]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157]] +// SIMD-ONLY0: cond.end157: +// SIMD-ONLY0-NEXT: [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ] +// SIMD-ONLY0-NEXT: [[CONV159:%.*]] = trunc i32 [[COND158]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV159]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV160:%.*]] = zext i8 [[TMP75]] to i32 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV161:%.*]] = zext i8 [[TMP76]] to i32 +// SIMD-ONLY0-NEXT: [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]] +// SIMD-ONLY0-NEXT: br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]] +// SIMD-ONLY0: cond.true164: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV165:%.*]] = zext i8 [[TMP77]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168:%.*]] +// SIMD-ONLY0: cond.false166: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV167:%.*]] = zext i8 [[TMP78]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168]] +// SIMD-ONLY0: cond.end168: +// SIMD-ONLY0-NEXT: [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ] +// SIMD-ONLY0-NEXT: [[CONV170:%.*]] = trunc i32 [[COND169]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV170]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP79]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV171:%.*]] = zext i8 [[TMP80]] to i32 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV172:%.*]] = zext i8 [[TMP81]] to i32 +// SIMD-ONLY0-NEXT: [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]] +// SIMD-ONLY0-NEXT: br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]] +// SIMD-ONLY0: cond.true175: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV176:%.*]] = zext i8 [[TMP82]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179:%.*]] +// SIMD-ONLY0: cond.false177: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV178:%.*]] = zext i8 [[TMP83]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179]] +// SIMD-ONLY0: cond.end179: +// SIMD-ONLY0-NEXT: [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ] +// SIMD-ONLY0-NEXT: [[CONV181:%.*]] = trunc i32 [[COND180]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV181]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP84]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV182:%.*]] = zext i8 [[TMP85]] to i32 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV183:%.*]] = zext i8 [[TMP86]] to i32 +// SIMD-ONLY0-NEXT: [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]] +// SIMD-ONLY0-NEXT: br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]] +// SIMD-ONLY0: cond.true186: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV187:%.*]] = zext i8 [[TMP87]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190:%.*]] +// SIMD-ONLY0: cond.false188: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV189:%.*]] = zext i8 [[TMP88]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190]] +// SIMD-ONLY0: cond.end190: +// SIMD-ONLY0-NEXT: [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ] +// SIMD-ONLY0-NEXT: [[CONV192:%.*]] = trunc i32 [[COND191]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV192]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP89]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP90]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV193:%.*]] = zext i8 [[TMP91]] to i32 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV194:%.*]] = zext i8 [[TMP92]] to i32 +// SIMD-ONLY0-NEXT: [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]] +// SIMD-ONLY0-NEXT: br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]] +// SIMD-ONLY0: cond.true197: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV198:%.*]] = zext i8 [[TMP93]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201:%.*]] +// SIMD-ONLY0: cond.false199: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV200:%.*]] = zext i8 [[TMP94]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201]] +// SIMD-ONLY0: cond.end201: +// SIMD-ONLY0-NEXT: [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ] +// SIMD-ONLY0-NEXT: [[CONV203:%.*]] = trunc i32 [[COND202]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV203]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP95]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV204:%.*]] = zext i8 [[TMP96]] to i32 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV205:%.*]] = zext i8 [[TMP97]] to i32 +// SIMD-ONLY0-NEXT: [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]] +// SIMD-ONLY0-NEXT: br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]] +// SIMD-ONLY0: cond.true208: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV209:%.*]] = zext i8 [[TMP98]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212:%.*]] +// SIMD-ONLY0: cond.false210: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV211:%.*]] = zext i8 [[TMP99]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212]] +// SIMD-ONLY0: cond.end212: +// SIMD-ONLY0-NEXT: [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ] +// SIMD-ONLY0-NEXT: [[CONV214:%.*]] = trunc i32 [[COND213]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV214]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP100]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV215:%.*]] = zext i8 [[TMP101]] to i32 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV216:%.*]] = zext i8 [[TMP102]] to i32 +// SIMD-ONLY0-NEXT: [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]] +// SIMD-ONLY0-NEXT: br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]] +// SIMD-ONLY0: cond.true219: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV220:%.*]] = zext i8 [[TMP103]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223:%.*]] +// SIMD-ONLY0: cond.false221: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV222:%.*]] = zext i8 [[TMP104]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223]] +// SIMD-ONLY0: cond.end223: +// SIMD-ONLY0-NEXT: [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ] +// SIMD-ONLY0-NEXT: [[CONV225:%.*]] = trunc i32 [[COND224]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV225]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV226:%.*]] = zext i8 [[TMP105]] to i32 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV227:%.*]] = zext i8 [[TMP106]] to i32 +// SIMD-ONLY0-NEXT: [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]] +// SIMD-ONLY0-NEXT: br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]] +// SIMD-ONLY0: cond.true230: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV231:%.*]] = zext i8 [[TMP107]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234:%.*]] +// SIMD-ONLY0: cond.false232: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV233:%.*]] = zext i8 [[TMP108]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234]] +// SIMD-ONLY0: cond.end234: +// SIMD-ONLY0-NEXT: [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ] +// SIMD-ONLY0-NEXT: [[CONV236:%.*]] = trunc i32 [[COND235]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV236]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP109]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV237:%.*]] = zext i8 [[TMP110]] to i32 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV238:%.*]] = zext i8 [[TMP111]] to i32 +// SIMD-ONLY0-NEXT: [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]] +// SIMD-ONLY0: cond.true241: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV242:%.*]] = zext i8 [[TMP112]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245:%.*]] +// SIMD-ONLY0: cond.false243: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV244:%.*]] = zext i8 [[TMP113]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245]] +// SIMD-ONLY0: cond.end245: +// SIMD-ONLY0-NEXT: [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ] +// SIMD-ONLY0-NEXT: [[CONV247:%.*]] = trunc i32 [[COND246]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV247]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP114]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV248:%.*]] = zext i8 [[TMP115]] to i32 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV249:%.*]] = zext i8 [[TMP116]] to i32 +// SIMD-ONLY0-NEXT: [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]] +// SIMD-ONLY0-NEXT: br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]] +// SIMD-ONLY0: cond.true252: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV253:%.*]] = zext i8 [[TMP117]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256:%.*]] +// SIMD-ONLY0: cond.false254: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV255:%.*]] = zext i8 [[TMP118]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256]] +// SIMD-ONLY0: cond.end256: +// SIMD-ONLY0-NEXT: [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ] +// SIMD-ONLY0-NEXT: [[CONV258:%.*]] = trunc i32 [[COND257]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV258]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP119]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP120]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV259:%.*]] = zext i8 [[TMP121]] to i32 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV260:%.*]] = zext i8 [[TMP122]] to i32 +// SIMD-ONLY0-NEXT: [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]] +// SIMD-ONLY0: cond.true263: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV264:%.*]] = zext i8 [[TMP123]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267:%.*]] +// SIMD-ONLY0: cond.false265: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV266:%.*]] = zext i8 [[TMP124]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267]] +// SIMD-ONLY0: cond.end267: +// SIMD-ONLY0-NEXT: [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ] +// SIMD-ONLY0-NEXT: [[CONV269:%.*]] = trunc i32 [[COND268]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV269]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP125]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV270:%.*]] = zext i8 [[TMP126]] to i32 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV271:%.*]] = zext i8 [[TMP127]] to i32 +// SIMD-ONLY0-NEXT: [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]] +// SIMD-ONLY0-NEXT: br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]] +// SIMD-ONLY0: cond.true274: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV275:%.*]] = zext i8 [[TMP128]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278:%.*]] +// SIMD-ONLY0: cond.false276: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV277:%.*]] = zext i8 [[TMP129]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278]] +// SIMD-ONLY0: cond.end278: +// SIMD-ONLY0-NEXT: [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ] +// SIMD-ONLY0-NEXT: [[CONV280:%.*]] = trunc i32 [[COND279]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV280]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP130]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV281:%.*]] = zext i8 [[TMP131]] to i32 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV282:%.*]] = zext i8 [[TMP132]] to i32 +// SIMD-ONLY0-NEXT: [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]] +// SIMD-ONLY0-NEXT: br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]] +// SIMD-ONLY0: cond.true285: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV286:%.*]] = zext i8 [[TMP133]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289:%.*]] +// SIMD-ONLY0: cond.false287: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV288:%.*]] = zext i8 [[TMP134]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289]] +// SIMD-ONLY0: cond.end289: +// SIMD-ONLY0-NEXT: [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ] +// SIMD-ONLY0-NEXT: [[CONV291:%.*]] = trunc i32 [[COND290]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV291]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV292:%.*]] = zext i8 [[TMP135]] to i32 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV293:%.*]] = zext i8 [[TMP136]] to i32 +// SIMD-ONLY0-NEXT: [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]] +// SIMD-ONLY0-NEXT: br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]] +// SIMD-ONLY0: cond.true296: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV297:%.*]] = zext i8 [[TMP137]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300:%.*]] +// SIMD-ONLY0: cond.false298: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV299:%.*]] = zext i8 [[TMP138]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300]] +// SIMD-ONLY0: cond.end300: +// SIMD-ONLY0-NEXT: [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ] +// SIMD-ONLY0-NEXT: [[CONV302:%.*]] = trunc i32 [[COND301]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV302]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP139]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV303:%.*]] = zext i8 [[TMP140]] to i32 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV304:%.*]] = zext i8 [[TMP141]] to i32 +// SIMD-ONLY0-NEXT: [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]] +// SIMD-ONLY0-NEXT: br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]] +// SIMD-ONLY0: cond.true307: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV308:%.*]] = zext i8 [[TMP142]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311:%.*]] +// SIMD-ONLY0: cond.false309: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV310:%.*]] = zext i8 [[TMP143]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311]] +// SIMD-ONLY0: cond.end311: +// SIMD-ONLY0-NEXT: [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ] +// SIMD-ONLY0-NEXT: [[CONV313:%.*]] = trunc i32 [[COND312]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV313]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP144]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV314:%.*]] = zext i8 [[TMP145]] to i32 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV315:%.*]] = zext i8 [[TMP146]] to i32 +// SIMD-ONLY0-NEXT: [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]] +// SIMD-ONLY0-NEXT: br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]] +// SIMD-ONLY0: cond.true318: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV319:%.*]] = zext i8 [[TMP147]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322:%.*]] +// SIMD-ONLY0: cond.false320: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV321:%.*]] = zext i8 [[TMP148]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322]] +// SIMD-ONLY0: cond.end322: +// SIMD-ONLY0-NEXT: [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ] +// SIMD-ONLY0-NEXT: [[CONV324:%.*]] = trunc i32 [[COND323]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV324]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP149]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP150]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV325:%.*]] = zext i8 [[TMP151]] to i32 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV326:%.*]] = zext i8 [[TMP152]] to i32 +// SIMD-ONLY0-NEXT: [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]] +// SIMD-ONLY0-NEXT: br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]] +// SIMD-ONLY0: cond.true329: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV330:%.*]] = zext i8 [[TMP153]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333:%.*]] +// SIMD-ONLY0: cond.false331: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV332:%.*]] = zext i8 [[TMP154]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333]] +// SIMD-ONLY0: cond.end333: +// SIMD-ONLY0-NEXT: [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ] +// SIMD-ONLY0-NEXT: [[CONV335:%.*]] = trunc i32 [[COND334]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV335]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP155]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV336:%.*]] = zext i8 [[TMP156]] to i32 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV337:%.*]] = zext i8 [[TMP157]] to i32 +// SIMD-ONLY0-NEXT: [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]] +// SIMD-ONLY0-NEXT: br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]] +// SIMD-ONLY0: cond.true340: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV341:%.*]] = zext i8 [[TMP158]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344:%.*]] +// SIMD-ONLY0: cond.false342: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV343:%.*]] = zext i8 [[TMP159]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344]] +// SIMD-ONLY0: cond.end344: +// SIMD-ONLY0-NEXT: [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ] +// SIMD-ONLY0-NEXT: [[CONV346:%.*]] = trunc i32 [[COND345]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV346]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP160]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV347:%.*]] = zext i8 [[TMP161]] to i32 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV348:%.*]] = zext i8 [[TMP162]] to i32 +// SIMD-ONLY0-NEXT: [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]] +// SIMD-ONLY0-NEXT: br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]] +// SIMD-ONLY0: cond.true351: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV352:%.*]] = zext i8 [[TMP163]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355:%.*]] +// SIMD-ONLY0: cond.false353: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV354:%.*]] = zext i8 [[TMP164]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355]] +// SIMD-ONLY0: cond.end355: +// SIMD-ONLY0-NEXT: [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ] +// SIMD-ONLY0-NEXT: [[CONV357:%.*]] = trunc i32 [[COND356]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV357]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV358:%.*]] = zext i8 [[TMP165]] to i32 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV359:%.*]] = zext i8 [[TMP166]] to i32 +// SIMD-ONLY0-NEXT: [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]] +// SIMD-ONLY0-NEXT: br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]] +// SIMD-ONLY0: cond.true362: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV363:%.*]] = zext i8 [[TMP167]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366:%.*]] +// SIMD-ONLY0: cond.false364: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV365:%.*]] = zext i8 [[TMP168]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366]] +// SIMD-ONLY0: cond.end366: +// SIMD-ONLY0-NEXT: [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ] +// SIMD-ONLY0-NEXT: [[CONV368:%.*]] = trunc i32 [[COND367]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV368]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP169]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV369:%.*]] = zext i8 [[TMP170]] to i32 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV370:%.*]] = zext i8 [[TMP171]] to i32 +// SIMD-ONLY0-NEXT: [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]] +// SIMD-ONLY0: cond.true373: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV374:%.*]] = zext i8 [[TMP172]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377:%.*]] +// SIMD-ONLY0: cond.false375: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV376:%.*]] = zext i8 [[TMP173]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377]] +// SIMD-ONLY0: cond.end377: +// SIMD-ONLY0-NEXT: [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ] +// SIMD-ONLY0-NEXT: [[CONV379:%.*]] = trunc i32 [[COND378]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV379]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP174]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV380:%.*]] = zext i8 [[TMP175]] to i32 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i8, ptr [[UCE]], align 1 +// SIMD-ONLY0-NEXT: [[CONV381:%.*]] = zext i8 [[TMP176]] to i32 +// SIMD-ONLY0-NEXT: [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]] +// SIMD-ONLY0-NEXT: br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]] +// SIMD-ONLY0: cond.true384: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i8, ptr [[UCD]], align 1 +// SIMD-ONLY0-NEXT: [[CONV385:%.*]] = zext i8 [[TMP177]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388:%.*]] +// SIMD-ONLY0: cond.false386: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[CONV387:%.*]] = zext i8 [[TMP178]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388]] +// SIMD-ONLY0: cond.end388: +// SIMD-ONLY0-NEXT: [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ] +// SIMD-ONLY0-NEXT: [[CONV390:%.*]] = trunc i32 [[COND389]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV390]], ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i8, ptr [[UCX]], align 1 +// SIMD-ONLY0-NEXT: store i8 [[TMP179]], ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i8, ptr [[UCV]], align 1 +// SIMD-ONLY0-NEXT: ret i8 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @sxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[SX:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SV:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SE:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[SD:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP0]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV3:%.*]] = sext i16 [[TMP3]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = sext i16 [[TMP4]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: [[CONV5:%.*]] = trunc i32 [[COND]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV5]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP5]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV6:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV7:%.*]] = sext i16 [[TMP7]] to i32 +// SIMD-ONLY0-NEXT: [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]] +// SIMD-ONLY0: cond.true10: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false12: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV13:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ] +// SIMD-ONLY0-NEXT: [[CONV16:%.*]] = trunc i32 [[COND15]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV16]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP10]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV17:%.*]] = sext i16 [[TMP11]] to i32 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV18:%.*]] = sext i16 [[TMP12]] to i32 +// SIMD-ONLY0-NEXT: [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]] +// SIMD-ONLY0-NEXT: br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true21: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV22:%.*]] = sext i16 [[TMP13]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV24:%.*]] = sext i16 [[TMP14]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25]] +// SIMD-ONLY0: cond.end25: +// SIMD-ONLY0-NEXT: [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: [[CONV27:%.*]] = trunc i32 [[COND26]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV27]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV28:%.*]] = sext i16 [[TMP15]] to i32 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV29:%.*]] = sext i16 [[TMP16]] to i32 +// SIMD-ONLY0-NEXT: [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]] +// SIMD-ONLY0-NEXT: br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV33:%.*]] = sext i16 [[TMP17]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36:%.*]] +// SIMD-ONLY0: cond.false34: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV35:%.*]] = sext i16 [[TMP18]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36]] +// SIMD-ONLY0: cond.end36: +// SIMD-ONLY0-NEXT: [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ] +// SIMD-ONLY0-NEXT: [[CONV38:%.*]] = trunc i32 [[COND37]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV38]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP19]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV39:%.*]] = sext i16 [[TMP20]] to i32 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV40:%.*]] = sext i16 [[TMP21]] to i32 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]] +// SIMD-ONLY0: cond.true43: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV44:%.*]] = sext i16 [[TMP22]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47:%.*]] +// SIMD-ONLY0: cond.false45: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV46:%.*]] = sext i16 [[TMP23]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47]] +// SIMD-ONLY0: cond.end47: +// SIMD-ONLY0-NEXT: [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ] +// SIMD-ONLY0-NEXT: [[CONV49:%.*]] = trunc i32 [[COND48]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV49]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP24]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV50:%.*]] = sext i16 [[TMP25]] to i32 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV51:%.*]] = sext i16 [[TMP26]] to i32 +// SIMD-ONLY0-NEXT: [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]] +// SIMD-ONLY0: cond.true54: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV55:%.*]] = sext i16 [[TMP27]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58:%.*]] +// SIMD-ONLY0: cond.false56: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV57:%.*]] = sext i16 [[TMP28]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58]] +// SIMD-ONLY0: cond.end58: +// SIMD-ONLY0-NEXT: [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ] +// SIMD-ONLY0-NEXT: [[CONV60:%.*]] = trunc i32 [[COND59]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV60]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP29]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP30]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV61:%.*]] = sext i16 [[TMP31]] to i32 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV62:%.*]] = sext i16 [[TMP32]] to i32 +// SIMD-ONLY0-NEXT: [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]] +// SIMD-ONLY0: cond.true65: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV66:%.*]] = sext i16 [[TMP33]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false67: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV68:%.*]] = sext i16 [[TMP34]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ] +// SIMD-ONLY0-NEXT: [[CONV71:%.*]] = trunc i32 [[COND70]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV71]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP35]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV72:%.*]] = sext i16 [[TMP36]] to i32 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV73:%.*]] = sext i16 [[TMP37]] to i32 +// SIMD-ONLY0-NEXT: [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]] +// SIMD-ONLY0-NEXT: br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true76: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV77:%.*]] = sext i16 [[TMP38]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV79:%.*]] = sext i16 [[TMP39]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80]] +// SIMD-ONLY0: cond.end80: +// SIMD-ONLY0-NEXT: [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: [[CONV82:%.*]] = trunc i32 [[COND81]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV82]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP40]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV83:%.*]] = sext i16 [[TMP41]] to i32 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV84:%.*]] = sext i16 [[TMP42]] to i32 +// SIMD-ONLY0-NEXT: [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]] +// SIMD-ONLY0-NEXT: br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV88:%.*]] = sext i16 [[TMP43]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91:%.*]] +// SIMD-ONLY0: cond.false89: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV90:%.*]] = sext i16 [[TMP44]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91]] +// SIMD-ONLY0: cond.end91: +// SIMD-ONLY0-NEXT: [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ] +// SIMD-ONLY0-NEXT: [[CONV93:%.*]] = trunc i32 [[COND92]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV93]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV94:%.*]] = sext i16 [[TMP45]] to i32 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV95:%.*]] = sext i16 [[TMP46]] to i32 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]] +// SIMD-ONLY0: cond.true98: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV99:%.*]] = sext i16 [[TMP47]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102:%.*]] +// SIMD-ONLY0: cond.false100: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV101:%.*]] = sext i16 [[TMP48]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102]] +// SIMD-ONLY0: cond.end102: +// SIMD-ONLY0-NEXT: [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ] +// SIMD-ONLY0-NEXT: [[CONV104:%.*]] = trunc i32 [[COND103]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV104]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP49]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV105:%.*]] = sext i16 [[TMP50]] to i32 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV106:%.*]] = sext i16 [[TMP51]] to i32 +// SIMD-ONLY0-NEXT: [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]] +// SIMD-ONLY0: cond.true109: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV110:%.*]] = sext i16 [[TMP52]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113:%.*]] +// SIMD-ONLY0: cond.false111: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV112:%.*]] = sext i16 [[TMP53]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113]] +// SIMD-ONLY0: cond.end113: +// SIMD-ONLY0-NEXT: [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ] +// SIMD-ONLY0-NEXT: [[CONV115:%.*]] = trunc i32 [[COND114]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV115]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP54]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV116:%.*]] = sext i16 [[TMP55]] to i32 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV117:%.*]] = sext i16 [[TMP56]] to i32 +// SIMD-ONLY0-NEXT: [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]] +// SIMD-ONLY0-NEXT: br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]] +// SIMD-ONLY0: cond.true120: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV121:%.*]] = sext i16 [[TMP57]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false122: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV123:%.*]] = sext i16 [[TMP58]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ] +// SIMD-ONLY0-NEXT: [[CONV126:%.*]] = trunc i32 [[COND125]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV126]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP59]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP60]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV127:%.*]] = sext i16 [[TMP61]] to i32 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV128:%.*]] = sext i16 [[TMP62]] to i32 +// SIMD-ONLY0-NEXT: [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]] +// SIMD-ONLY0-NEXT: br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true131: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV132:%.*]] = sext i16 [[TMP63]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV134:%.*]] = sext i16 [[TMP64]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135]] +// SIMD-ONLY0: cond.end135: +// SIMD-ONLY0-NEXT: [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: [[CONV137:%.*]] = trunc i32 [[COND136]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV137]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP65]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV138:%.*]] = sext i16 [[TMP66]] to i32 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV139:%.*]] = sext i16 [[TMP67]] to i32 +// SIMD-ONLY0-NEXT: [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]] +// SIMD-ONLY0-NEXT: br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV143:%.*]] = sext i16 [[TMP68]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146:%.*]] +// SIMD-ONLY0: cond.false144: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV145:%.*]] = sext i16 [[TMP69]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146]] +// SIMD-ONLY0: cond.end146: +// SIMD-ONLY0-NEXT: [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ] +// SIMD-ONLY0-NEXT: [[CONV148:%.*]] = trunc i32 [[COND147]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV148]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP70]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV149:%.*]] = sext i16 [[TMP71]] to i32 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV150:%.*]] = sext i16 [[TMP72]] to i32 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]] +// SIMD-ONLY0: cond.true153: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV154:%.*]] = sext i16 [[TMP73]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157:%.*]] +// SIMD-ONLY0: cond.false155: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV156:%.*]] = sext i16 [[TMP74]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157]] +// SIMD-ONLY0: cond.end157: +// SIMD-ONLY0-NEXT: [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ] +// SIMD-ONLY0-NEXT: [[CONV159:%.*]] = trunc i32 [[COND158]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV159]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV160:%.*]] = sext i16 [[TMP75]] to i32 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV161:%.*]] = sext i16 [[TMP76]] to i32 +// SIMD-ONLY0-NEXT: [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]] +// SIMD-ONLY0-NEXT: br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]] +// SIMD-ONLY0: cond.true164: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV165:%.*]] = sext i16 [[TMP77]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168:%.*]] +// SIMD-ONLY0: cond.false166: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV167:%.*]] = sext i16 [[TMP78]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168]] +// SIMD-ONLY0: cond.end168: +// SIMD-ONLY0-NEXT: [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ] +// SIMD-ONLY0-NEXT: [[CONV170:%.*]] = trunc i32 [[COND169]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV170]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP79]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV171:%.*]] = sext i16 [[TMP80]] to i32 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV172:%.*]] = sext i16 [[TMP81]] to i32 +// SIMD-ONLY0-NEXT: [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]] +// SIMD-ONLY0-NEXT: br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]] +// SIMD-ONLY0: cond.true175: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV176:%.*]] = sext i16 [[TMP82]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179:%.*]] +// SIMD-ONLY0: cond.false177: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV178:%.*]] = sext i16 [[TMP83]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179]] +// SIMD-ONLY0: cond.end179: +// SIMD-ONLY0-NEXT: [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ] +// SIMD-ONLY0-NEXT: [[CONV181:%.*]] = trunc i32 [[COND180]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV181]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP84]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV182:%.*]] = sext i16 [[TMP85]] to i32 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV183:%.*]] = sext i16 [[TMP86]] to i32 +// SIMD-ONLY0-NEXT: [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]] +// SIMD-ONLY0-NEXT: br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]] +// SIMD-ONLY0: cond.true186: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV187:%.*]] = sext i16 [[TMP87]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190:%.*]] +// SIMD-ONLY0: cond.false188: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV189:%.*]] = sext i16 [[TMP88]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190]] +// SIMD-ONLY0: cond.end190: +// SIMD-ONLY0-NEXT: [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ] +// SIMD-ONLY0-NEXT: [[CONV192:%.*]] = trunc i32 [[COND191]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV192]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP89]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP90]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV193:%.*]] = sext i16 [[TMP91]] to i32 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV194:%.*]] = sext i16 [[TMP92]] to i32 +// SIMD-ONLY0-NEXT: [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]] +// SIMD-ONLY0-NEXT: br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]] +// SIMD-ONLY0: cond.true197: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV198:%.*]] = sext i16 [[TMP93]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201:%.*]] +// SIMD-ONLY0: cond.false199: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV200:%.*]] = sext i16 [[TMP94]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201]] +// SIMD-ONLY0: cond.end201: +// SIMD-ONLY0-NEXT: [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ] +// SIMD-ONLY0-NEXT: [[CONV203:%.*]] = trunc i32 [[COND202]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV203]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP95]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV204:%.*]] = sext i16 [[TMP96]] to i32 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV205:%.*]] = sext i16 [[TMP97]] to i32 +// SIMD-ONLY0-NEXT: [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]] +// SIMD-ONLY0-NEXT: br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]] +// SIMD-ONLY0: cond.true208: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV209:%.*]] = sext i16 [[TMP98]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212:%.*]] +// SIMD-ONLY0: cond.false210: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV211:%.*]] = sext i16 [[TMP99]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212]] +// SIMD-ONLY0: cond.end212: +// SIMD-ONLY0-NEXT: [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ] +// SIMD-ONLY0-NEXT: [[CONV214:%.*]] = trunc i32 [[COND213]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV214]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP100]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV215:%.*]] = sext i16 [[TMP101]] to i32 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV216:%.*]] = sext i16 [[TMP102]] to i32 +// SIMD-ONLY0-NEXT: [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]] +// SIMD-ONLY0-NEXT: br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]] +// SIMD-ONLY0: cond.true219: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV220:%.*]] = sext i16 [[TMP103]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223:%.*]] +// SIMD-ONLY0: cond.false221: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV222:%.*]] = sext i16 [[TMP104]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223]] +// SIMD-ONLY0: cond.end223: +// SIMD-ONLY0-NEXT: [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ] +// SIMD-ONLY0-NEXT: [[CONV225:%.*]] = trunc i32 [[COND224]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV225]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV226:%.*]] = sext i16 [[TMP105]] to i32 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV227:%.*]] = sext i16 [[TMP106]] to i32 +// SIMD-ONLY0-NEXT: [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]] +// SIMD-ONLY0-NEXT: br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]] +// SIMD-ONLY0: cond.true230: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV231:%.*]] = sext i16 [[TMP107]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234:%.*]] +// SIMD-ONLY0: cond.false232: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV233:%.*]] = sext i16 [[TMP108]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234]] +// SIMD-ONLY0: cond.end234: +// SIMD-ONLY0-NEXT: [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ] +// SIMD-ONLY0-NEXT: [[CONV236:%.*]] = trunc i32 [[COND235]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV236]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP109]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV237:%.*]] = sext i16 [[TMP110]] to i32 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV238:%.*]] = sext i16 [[TMP111]] to i32 +// SIMD-ONLY0-NEXT: [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]] +// SIMD-ONLY0: cond.true241: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV242:%.*]] = sext i16 [[TMP112]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245:%.*]] +// SIMD-ONLY0: cond.false243: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV244:%.*]] = sext i16 [[TMP113]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245]] +// SIMD-ONLY0: cond.end245: +// SIMD-ONLY0-NEXT: [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ] +// SIMD-ONLY0-NEXT: [[CONV247:%.*]] = trunc i32 [[COND246]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV247]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP114]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV248:%.*]] = sext i16 [[TMP115]] to i32 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV249:%.*]] = sext i16 [[TMP116]] to i32 +// SIMD-ONLY0-NEXT: [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]] +// SIMD-ONLY0-NEXT: br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]] +// SIMD-ONLY0: cond.true252: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV253:%.*]] = sext i16 [[TMP117]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256:%.*]] +// SIMD-ONLY0: cond.false254: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV255:%.*]] = sext i16 [[TMP118]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256]] +// SIMD-ONLY0: cond.end256: +// SIMD-ONLY0-NEXT: [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ] +// SIMD-ONLY0-NEXT: [[CONV258:%.*]] = trunc i32 [[COND257]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV258]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP119]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP120]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV259:%.*]] = sext i16 [[TMP121]] to i32 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV260:%.*]] = sext i16 [[TMP122]] to i32 +// SIMD-ONLY0-NEXT: [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]] +// SIMD-ONLY0: cond.true263: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV264:%.*]] = sext i16 [[TMP123]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267:%.*]] +// SIMD-ONLY0: cond.false265: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV266:%.*]] = sext i16 [[TMP124]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267]] +// SIMD-ONLY0: cond.end267: +// SIMD-ONLY0-NEXT: [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ] +// SIMD-ONLY0-NEXT: [[CONV269:%.*]] = trunc i32 [[COND268]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV269]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP125]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV270:%.*]] = sext i16 [[TMP126]] to i32 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV271:%.*]] = sext i16 [[TMP127]] to i32 +// SIMD-ONLY0-NEXT: [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]] +// SIMD-ONLY0-NEXT: br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]] +// SIMD-ONLY0: cond.true274: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV275:%.*]] = sext i16 [[TMP128]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278:%.*]] +// SIMD-ONLY0: cond.false276: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV277:%.*]] = sext i16 [[TMP129]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278]] +// SIMD-ONLY0: cond.end278: +// SIMD-ONLY0-NEXT: [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ] +// SIMD-ONLY0-NEXT: [[CONV280:%.*]] = trunc i32 [[COND279]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV280]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP130]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV281:%.*]] = sext i16 [[TMP131]] to i32 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV282:%.*]] = sext i16 [[TMP132]] to i32 +// SIMD-ONLY0-NEXT: [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]] +// SIMD-ONLY0-NEXT: br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]] +// SIMD-ONLY0: cond.true285: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV286:%.*]] = sext i16 [[TMP133]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289:%.*]] +// SIMD-ONLY0: cond.false287: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV288:%.*]] = sext i16 [[TMP134]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289]] +// SIMD-ONLY0: cond.end289: +// SIMD-ONLY0-NEXT: [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ] +// SIMD-ONLY0-NEXT: [[CONV291:%.*]] = trunc i32 [[COND290]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV291]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV292:%.*]] = sext i16 [[TMP135]] to i32 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV293:%.*]] = sext i16 [[TMP136]] to i32 +// SIMD-ONLY0-NEXT: [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]] +// SIMD-ONLY0-NEXT: br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]] +// SIMD-ONLY0: cond.true296: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV297:%.*]] = sext i16 [[TMP137]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300:%.*]] +// SIMD-ONLY0: cond.false298: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV299:%.*]] = sext i16 [[TMP138]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300]] +// SIMD-ONLY0: cond.end300: +// SIMD-ONLY0-NEXT: [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ] +// SIMD-ONLY0-NEXT: [[CONV302:%.*]] = trunc i32 [[COND301]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV302]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP139]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV303:%.*]] = sext i16 [[TMP140]] to i32 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV304:%.*]] = sext i16 [[TMP141]] to i32 +// SIMD-ONLY0-NEXT: [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]] +// SIMD-ONLY0-NEXT: br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]] +// SIMD-ONLY0: cond.true307: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV308:%.*]] = sext i16 [[TMP142]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311:%.*]] +// SIMD-ONLY0: cond.false309: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV310:%.*]] = sext i16 [[TMP143]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311]] +// SIMD-ONLY0: cond.end311: +// SIMD-ONLY0-NEXT: [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ] +// SIMD-ONLY0-NEXT: [[CONV313:%.*]] = trunc i32 [[COND312]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV313]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP144]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV314:%.*]] = sext i16 [[TMP145]] to i32 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV315:%.*]] = sext i16 [[TMP146]] to i32 +// SIMD-ONLY0-NEXT: [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]] +// SIMD-ONLY0-NEXT: br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]] +// SIMD-ONLY0: cond.true318: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV319:%.*]] = sext i16 [[TMP147]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322:%.*]] +// SIMD-ONLY0: cond.false320: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV321:%.*]] = sext i16 [[TMP148]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322]] +// SIMD-ONLY0: cond.end322: +// SIMD-ONLY0-NEXT: [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ] +// SIMD-ONLY0-NEXT: [[CONV324:%.*]] = trunc i32 [[COND323]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV324]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP149]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP150]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV325:%.*]] = sext i16 [[TMP151]] to i32 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV326:%.*]] = sext i16 [[TMP152]] to i32 +// SIMD-ONLY0-NEXT: [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]] +// SIMD-ONLY0-NEXT: br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]] +// SIMD-ONLY0: cond.true329: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV330:%.*]] = sext i16 [[TMP153]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333:%.*]] +// SIMD-ONLY0: cond.false331: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV332:%.*]] = sext i16 [[TMP154]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333]] +// SIMD-ONLY0: cond.end333: +// SIMD-ONLY0-NEXT: [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ] +// SIMD-ONLY0-NEXT: [[CONV335:%.*]] = trunc i32 [[COND334]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV335]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP155]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV336:%.*]] = sext i16 [[TMP156]] to i32 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV337:%.*]] = sext i16 [[TMP157]] to i32 +// SIMD-ONLY0-NEXT: [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]] +// SIMD-ONLY0-NEXT: br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]] +// SIMD-ONLY0: cond.true340: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV341:%.*]] = sext i16 [[TMP158]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344:%.*]] +// SIMD-ONLY0: cond.false342: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV343:%.*]] = sext i16 [[TMP159]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344]] +// SIMD-ONLY0: cond.end344: +// SIMD-ONLY0-NEXT: [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ] +// SIMD-ONLY0-NEXT: [[CONV346:%.*]] = trunc i32 [[COND345]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV346]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP160]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV347:%.*]] = sext i16 [[TMP161]] to i32 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV348:%.*]] = sext i16 [[TMP162]] to i32 +// SIMD-ONLY0-NEXT: [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]] +// SIMD-ONLY0-NEXT: br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]] +// SIMD-ONLY0: cond.true351: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV352:%.*]] = sext i16 [[TMP163]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355:%.*]] +// SIMD-ONLY0: cond.false353: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV354:%.*]] = sext i16 [[TMP164]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355]] +// SIMD-ONLY0: cond.end355: +// SIMD-ONLY0-NEXT: [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ] +// SIMD-ONLY0-NEXT: [[CONV357:%.*]] = trunc i32 [[COND356]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV357]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV358:%.*]] = sext i16 [[TMP165]] to i32 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV359:%.*]] = sext i16 [[TMP166]] to i32 +// SIMD-ONLY0-NEXT: [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]] +// SIMD-ONLY0-NEXT: br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]] +// SIMD-ONLY0: cond.true362: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV363:%.*]] = sext i16 [[TMP167]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366:%.*]] +// SIMD-ONLY0: cond.false364: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV365:%.*]] = sext i16 [[TMP168]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366]] +// SIMD-ONLY0: cond.end366: +// SIMD-ONLY0-NEXT: [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ] +// SIMD-ONLY0-NEXT: [[CONV368:%.*]] = trunc i32 [[COND367]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV368]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP169]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV369:%.*]] = sext i16 [[TMP170]] to i32 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV370:%.*]] = sext i16 [[TMP171]] to i32 +// SIMD-ONLY0-NEXT: [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]] +// SIMD-ONLY0: cond.true373: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV374:%.*]] = sext i16 [[TMP172]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377:%.*]] +// SIMD-ONLY0: cond.false375: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV376:%.*]] = sext i16 [[TMP173]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377]] +// SIMD-ONLY0: cond.end377: +// SIMD-ONLY0-NEXT: [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ] +// SIMD-ONLY0-NEXT: [[CONV379:%.*]] = trunc i32 [[COND378]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV379]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP174]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV380:%.*]] = sext i16 [[TMP175]] to i32 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i16, ptr [[SE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV381:%.*]] = sext i16 [[TMP176]] to i32 +// SIMD-ONLY0-NEXT: [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]] +// SIMD-ONLY0-NEXT: br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]] +// SIMD-ONLY0: cond.true384: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i16, ptr [[SD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV385:%.*]] = sext i16 [[TMP177]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388:%.*]] +// SIMD-ONLY0: cond.false386: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV387:%.*]] = sext i16 [[TMP178]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388]] +// SIMD-ONLY0: cond.end388: +// SIMD-ONLY0-NEXT: [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ] +// SIMD-ONLY0-NEXT: [[CONV390:%.*]] = trunc i32 [[COND389]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV390]], ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i16, ptr [[SX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP179]], ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i16, ptr [[SV]], align 2 +// SIMD-ONLY0-NEXT: ret i16 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @usxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[USX:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USV:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USE:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[USD:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP0]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = zext i16 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = zext i16 [[TMP2]] to i32 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV3:%.*]] = zext i16 [[TMP3]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = zext i16 [[TMP4]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[CONV4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: [[CONV5:%.*]] = trunc i32 [[COND]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV5]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP5]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV6:%.*]] = zext i16 [[TMP6]] to i32 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV7:%.*]] = zext i16 [[TMP7]] to i32 +// SIMD-ONLY0-NEXT: [[CMP8:%.*]] = icmp slt i32 [[CONV6]], [[CONV7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP8]], label [[COND_TRUE10:%.*]], label [[COND_FALSE12:%.*]] +// SIMD-ONLY0: cond.true10: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = zext i16 [[TMP8]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false12: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV13:%.*]] = zext i16 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i32 [ [[CONV11]], [[COND_TRUE10]] ], [ [[CONV13]], [[COND_FALSE12]] ] +// SIMD-ONLY0-NEXT: [[CONV16:%.*]] = trunc i32 [[COND15]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV16]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP10]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV17:%.*]] = zext i16 [[TMP11]] to i32 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV18:%.*]] = zext i16 [[TMP12]] to i32 +// SIMD-ONLY0-NEXT: [[CMP19:%.*]] = icmp eq i32 [[CONV17]], [[CONV18]] +// SIMD-ONLY0-NEXT: br i1 [[CMP19]], label [[COND_TRUE21:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true21: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV22:%.*]] = zext i16 [[TMP13]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV24:%.*]] = zext i16 [[TMP14]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END25]] +// SIMD-ONLY0: cond.end25: +// SIMD-ONLY0-NEXT: [[COND26:%.*]] = phi i32 [ [[CONV22]], [[COND_TRUE21]] ], [ [[CONV24]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: [[CONV27:%.*]] = trunc i32 [[COND26]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV27]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV28:%.*]] = zext i16 [[TMP15]] to i32 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV29:%.*]] = zext i16 [[TMP16]] to i32 +// SIMD-ONLY0-NEXT: [[CMP30:%.*]] = icmp sgt i32 [[CONV28]], [[CONV29]] +// SIMD-ONLY0-NEXT: br i1 [[CMP30]], label [[COND_TRUE32:%.*]], label [[COND_FALSE34:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV33:%.*]] = zext i16 [[TMP17]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36:%.*]] +// SIMD-ONLY0: cond.false34: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV35:%.*]] = zext i16 [[TMP18]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END36]] +// SIMD-ONLY0: cond.end36: +// SIMD-ONLY0-NEXT: [[COND37:%.*]] = phi i32 [ [[CONV33]], [[COND_TRUE32]] ], [ [[CONV35]], [[COND_FALSE34]] ] +// SIMD-ONLY0-NEXT: [[CONV38:%.*]] = trunc i32 [[COND37]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV38]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP19]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV39:%.*]] = zext i16 [[TMP20]] to i32 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV40:%.*]] = zext i16 [[TMP21]] to i32 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp slt i32 [[CONV39]], [[CONV40]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE43:%.*]], label [[COND_FALSE45:%.*]] +// SIMD-ONLY0: cond.true43: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV44:%.*]] = zext i16 [[TMP22]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47:%.*]] +// SIMD-ONLY0: cond.false45: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV46:%.*]] = zext i16 [[TMP23]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END47]] +// SIMD-ONLY0: cond.end47: +// SIMD-ONLY0-NEXT: [[COND48:%.*]] = phi i32 [ [[CONV44]], [[COND_TRUE43]] ], [ [[CONV46]], [[COND_FALSE45]] ] +// SIMD-ONLY0-NEXT: [[CONV49:%.*]] = trunc i32 [[COND48]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV49]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP24]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV50:%.*]] = zext i16 [[TMP25]] to i32 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV51:%.*]] = zext i16 [[TMP26]] to i32 +// SIMD-ONLY0-NEXT: [[CMP52:%.*]] = icmp eq i32 [[CONV50]], [[CONV51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP52]], label [[COND_TRUE54:%.*]], label [[COND_FALSE56:%.*]] +// SIMD-ONLY0: cond.true54: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV55:%.*]] = zext i16 [[TMP27]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58:%.*]] +// SIMD-ONLY0: cond.false56: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV57:%.*]] = zext i16 [[TMP28]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END58]] +// SIMD-ONLY0: cond.end58: +// SIMD-ONLY0-NEXT: [[COND59:%.*]] = phi i32 [ [[CONV55]], [[COND_TRUE54]] ], [ [[CONV57]], [[COND_FALSE56]] ] +// SIMD-ONLY0-NEXT: [[CONV60:%.*]] = trunc i32 [[COND59]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV60]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP29]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP30]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV61:%.*]] = zext i16 [[TMP31]] to i32 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV62:%.*]] = zext i16 [[TMP32]] to i32 +// SIMD-ONLY0-NEXT: [[CMP63:%.*]] = icmp sgt i32 [[CONV61]], [[CONV62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP63]], label [[COND_TRUE65:%.*]], label [[COND_FALSE67:%.*]] +// SIMD-ONLY0: cond.true65: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV66:%.*]] = zext i16 [[TMP33]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false67: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV68:%.*]] = zext i16 [[TMP34]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i32 [ [[CONV66]], [[COND_TRUE65]] ], [ [[CONV68]], [[COND_FALSE67]] ] +// SIMD-ONLY0-NEXT: [[CONV71:%.*]] = trunc i32 [[COND70]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV71]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP35]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV72:%.*]] = zext i16 [[TMP36]] to i32 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV73:%.*]] = zext i16 [[TMP37]] to i32 +// SIMD-ONLY0-NEXT: [[CMP74:%.*]] = icmp slt i32 [[CONV72]], [[CONV73]] +// SIMD-ONLY0-NEXT: br i1 [[CMP74]], label [[COND_TRUE76:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true76: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV77:%.*]] = zext i16 [[TMP38]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV79:%.*]] = zext i16 [[TMP39]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END80]] +// SIMD-ONLY0: cond.end80: +// SIMD-ONLY0-NEXT: [[COND81:%.*]] = phi i32 [ [[CONV77]], [[COND_TRUE76]] ], [ [[CONV79]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: [[CONV82:%.*]] = trunc i32 [[COND81]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV82]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP40]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV83:%.*]] = zext i16 [[TMP41]] to i32 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV84:%.*]] = zext i16 [[TMP42]] to i32 +// SIMD-ONLY0-NEXT: [[CMP85:%.*]] = icmp eq i32 [[CONV83]], [[CONV84]] +// SIMD-ONLY0-NEXT: br i1 [[CMP85]], label [[COND_TRUE87:%.*]], label [[COND_FALSE89:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV88:%.*]] = zext i16 [[TMP43]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91:%.*]] +// SIMD-ONLY0: cond.false89: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV90:%.*]] = zext i16 [[TMP44]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END91]] +// SIMD-ONLY0: cond.end91: +// SIMD-ONLY0-NEXT: [[COND92:%.*]] = phi i32 [ [[CONV88]], [[COND_TRUE87]] ], [ [[CONV90]], [[COND_FALSE89]] ] +// SIMD-ONLY0-NEXT: [[CONV93:%.*]] = trunc i32 [[COND92]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV93]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV94:%.*]] = zext i16 [[TMP45]] to i32 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV95:%.*]] = zext i16 [[TMP46]] to i32 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp sgt i32 [[CONV94]], [[CONV95]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE98:%.*]], label [[COND_FALSE100:%.*]] +// SIMD-ONLY0: cond.true98: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV99:%.*]] = zext i16 [[TMP47]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102:%.*]] +// SIMD-ONLY0: cond.false100: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV101:%.*]] = zext i16 [[TMP48]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END102]] +// SIMD-ONLY0: cond.end102: +// SIMD-ONLY0-NEXT: [[COND103:%.*]] = phi i32 [ [[CONV99]], [[COND_TRUE98]] ], [ [[CONV101]], [[COND_FALSE100]] ] +// SIMD-ONLY0-NEXT: [[CONV104:%.*]] = trunc i32 [[COND103]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV104]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP49]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV105:%.*]] = zext i16 [[TMP50]] to i32 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV106:%.*]] = zext i16 [[TMP51]] to i32 +// SIMD-ONLY0-NEXT: [[CMP107:%.*]] = icmp slt i32 [[CONV105]], [[CONV106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP107]], label [[COND_TRUE109:%.*]], label [[COND_FALSE111:%.*]] +// SIMD-ONLY0: cond.true109: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV110:%.*]] = zext i16 [[TMP52]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113:%.*]] +// SIMD-ONLY0: cond.false111: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV112:%.*]] = zext i16 [[TMP53]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END113]] +// SIMD-ONLY0: cond.end113: +// SIMD-ONLY0-NEXT: [[COND114:%.*]] = phi i32 [ [[CONV110]], [[COND_TRUE109]] ], [ [[CONV112]], [[COND_FALSE111]] ] +// SIMD-ONLY0-NEXT: [[CONV115:%.*]] = trunc i32 [[COND114]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV115]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP54]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV116:%.*]] = zext i16 [[TMP55]] to i32 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV117:%.*]] = zext i16 [[TMP56]] to i32 +// SIMD-ONLY0-NEXT: [[CMP118:%.*]] = icmp eq i32 [[CONV116]], [[CONV117]] +// SIMD-ONLY0-NEXT: br i1 [[CMP118]], label [[COND_TRUE120:%.*]], label [[COND_FALSE122:%.*]] +// SIMD-ONLY0: cond.true120: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV121:%.*]] = zext i16 [[TMP57]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false122: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV123:%.*]] = zext i16 [[TMP58]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i32 [ [[CONV121]], [[COND_TRUE120]] ], [ [[CONV123]], [[COND_FALSE122]] ] +// SIMD-ONLY0-NEXT: [[CONV126:%.*]] = trunc i32 [[COND125]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV126]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP59]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP60]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV127:%.*]] = zext i16 [[TMP61]] to i32 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV128:%.*]] = zext i16 [[TMP62]] to i32 +// SIMD-ONLY0-NEXT: [[CMP129:%.*]] = icmp sgt i32 [[CONV127]], [[CONV128]] +// SIMD-ONLY0-NEXT: br i1 [[CMP129]], label [[COND_TRUE131:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true131: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV132:%.*]] = zext i16 [[TMP63]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV134:%.*]] = zext i16 [[TMP64]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END135]] +// SIMD-ONLY0: cond.end135: +// SIMD-ONLY0-NEXT: [[COND136:%.*]] = phi i32 [ [[CONV132]], [[COND_TRUE131]] ], [ [[CONV134]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: [[CONV137:%.*]] = trunc i32 [[COND136]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV137]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP65]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV138:%.*]] = zext i16 [[TMP66]] to i32 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV139:%.*]] = zext i16 [[TMP67]] to i32 +// SIMD-ONLY0-NEXT: [[CMP140:%.*]] = icmp slt i32 [[CONV138]], [[CONV139]] +// SIMD-ONLY0-NEXT: br i1 [[CMP140]], label [[COND_TRUE142:%.*]], label [[COND_FALSE144:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV143:%.*]] = zext i16 [[TMP68]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146:%.*]] +// SIMD-ONLY0: cond.false144: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV145:%.*]] = zext i16 [[TMP69]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END146]] +// SIMD-ONLY0: cond.end146: +// SIMD-ONLY0-NEXT: [[COND147:%.*]] = phi i32 [ [[CONV143]], [[COND_TRUE142]] ], [ [[CONV145]], [[COND_FALSE144]] ] +// SIMD-ONLY0-NEXT: [[CONV148:%.*]] = trunc i32 [[COND147]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV148]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP70]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV149:%.*]] = zext i16 [[TMP71]] to i32 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV150:%.*]] = zext i16 [[TMP72]] to i32 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp eq i32 [[CONV149]], [[CONV150]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE153:%.*]], label [[COND_FALSE155:%.*]] +// SIMD-ONLY0: cond.true153: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV154:%.*]] = zext i16 [[TMP73]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157:%.*]] +// SIMD-ONLY0: cond.false155: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV156:%.*]] = zext i16 [[TMP74]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END157]] +// SIMD-ONLY0: cond.end157: +// SIMD-ONLY0-NEXT: [[COND158:%.*]] = phi i32 [ [[CONV154]], [[COND_TRUE153]] ], [ [[CONV156]], [[COND_FALSE155]] ] +// SIMD-ONLY0-NEXT: [[CONV159:%.*]] = trunc i32 [[COND158]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV159]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV160:%.*]] = zext i16 [[TMP75]] to i32 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV161:%.*]] = zext i16 [[TMP76]] to i32 +// SIMD-ONLY0-NEXT: [[CMP162:%.*]] = icmp sgt i32 [[CONV160]], [[CONV161]] +// SIMD-ONLY0-NEXT: br i1 [[CMP162]], label [[COND_TRUE164:%.*]], label [[COND_FALSE166:%.*]] +// SIMD-ONLY0: cond.true164: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV165:%.*]] = zext i16 [[TMP77]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168:%.*]] +// SIMD-ONLY0: cond.false166: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV167:%.*]] = zext i16 [[TMP78]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END168]] +// SIMD-ONLY0: cond.end168: +// SIMD-ONLY0-NEXT: [[COND169:%.*]] = phi i32 [ [[CONV165]], [[COND_TRUE164]] ], [ [[CONV167]], [[COND_FALSE166]] ] +// SIMD-ONLY0-NEXT: [[CONV170:%.*]] = trunc i32 [[COND169]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV170]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP79]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV171:%.*]] = zext i16 [[TMP80]] to i32 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV172:%.*]] = zext i16 [[TMP81]] to i32 +// SIMD-ONLY0-NEXT: [[CMP173:%.*]] = icmp slt i32 [[CONV171]], [[CONV172]] +// SIMD-ONLY0-NEXT: br i1 [[CMP173]], label [[COND_TRUE175:%.*]], label [[COND_FALSE177:%.*]] +// SIMD-ONLY0: cond.true175: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV176:%.*]] = zext i16 [[TMP82]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179:%.*]] +// SIMD-ONLY0: cond.false177: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV178:%.*]] = zext i16 [[TMP83]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END179]] +// SIMD-ONLY0: cond.end179: +// SIMD-ONLY0-NEXT: [[COND180:%.*]] = phi i32 [ [[CONV176]], [[COND_TRUE175]] ], [ [[CONV178]], [[COND_FALSE177]] ] +// SIMD-ONLY0-NEXT: [[CONV181:%.*]] = trunc i32 [[COND180]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV181]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP84]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV182:%.*]] = zext i16 [[TMP85]] to i32 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV183:%.*]] = zext i16 [[TMP86]] to i32 +// SIMD-ONLY0-NEXT: [[CMP184:%.*]] = icmp eq i32 [[CONV182]], [[CONV183]] +// SIMD-ONLY0-NEXT: br i1 [[CMP184]], label [[COND_TRUE186:%.*]], label [[COND_FALSE188:%.*]] +// SIMD-ONLY0: cond.true186: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV187:%.*]] = zext i16 [[TMP87]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190:%.*]] +// SIMD-ONLY0: cond.false188: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV189:%.*]] = zext i16 [[TMP88]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END190]] +// SIMD-ONLY0: cond.end190: +// SIMD-ONLY0-NEXT: [[COND191:%.*]] = phi i32 [ [[CONV187]], [[COND_TRUE186]] ], [ [[CONV189]], [[COND_FALSE188]] ] +// SIMD-ONLY0-NEXT: [[CONV192:%.*]] = trunc i32 [[COND191]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV192]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP89]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP90]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV193:%.*]] = zext i16 [[TMP91]] to i32 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV194:%.*]] = zext i16 [[TMP92]] to i32 +// SIMD-ONLY0-NEXT: [[CMP195:%.*]] = icmp sgt i32 [[CONV193]], [[CONV194]] +// SIMD-ONLY0-NEXT: br i1 [[CMP195]], label [[COND_TRUE197:%.*]], label [[COND_FALSE199:%.*]] +// SIMD-ONLY0: cond.true197: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV198:%.*]] = zext i16 [[TMP93]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201:%.*]] +// SIMD-ONLY0: cond.false199: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV200:%.*]] = zext i16 [[TMP94]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END201]] +// SIMD-ONLY0: cond.end201: +// SIMD-ONLY0-NEXT: [[COND202:%.*]] = phi i32 [ [[CONV198]], [[COND_TRUE197]] ], [ [[CONV200]], [[COND_FALSE199]] ] +// SIMD-ONLY0-NEXT: [[CONV203:%.*]] = trunc i32 [[COND202]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV203]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP95]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV204:%.*]] = zext i16 [[TMP96]] to i32 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV205:%.*]] = zext i16 [[TMP97]] to i32 +// SIMD-ONLY0-NEXT: [[CMP206:%.*]] = icmp slt i32 [[CONV204]], [[CONV205]] +// SIMD-ONLY0-NEXT: br i1 [[CMP206]], label [[COND_TRUE208:%.*]], label [[COND_FALSE210:%.*]] +// SIMD-ONLY0: cond.true208: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV209:%.*]] = zext i16 [[TMP98]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212:%.*]] +// SIMD-ONLY0: cond.false210: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV211:%.*]] = zext i16 [[TMP99]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END212]] +// SIMD-ONLY0: cond.end212: +// SIMD-ONLY0-NEXT: [[COND213:%.*]] = phi i32 [ [[CONV209]], [[COND_TRUE208]] ], [ [[CONV211]], [[COND_FALSE210]] ] +// SIMD-ONLY0-NEXT: [[CONV214:%.*]] = trunc i32 [[COND213]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV214]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP100]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV215:%.*]] = zext i16 [[TMP101]] to i32 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV216:%.*]] = zext i16 [[TMP102]] to i32 +// SIMD-ONLY0-NEXT: [[CMP217:%.*]] = icmp eq i32 [[CONV215]], [[CONV216]] +// SIMD-ONLY0-NEXT: br i1 [[CMP217]], label [[COND_TRUE219:%.*]], label [[COND_FALSE221:%.*]] +// SIMD-ONLY0: cond.true219: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV220:%.*]] = zext i16 [[TMP103]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223:%.*]] +// SIMD-ONLY0: cond.false221: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV222:%.*]] = zext i16 [[TMP104]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END223]] +// SIMD-ONLY0: cond.end223: +// SIMD-ONLY0-NEXT: [[COND224:%.*]] = phi i32 [ [[CONV220]], [[COND_TRUE219]] ], [ [[CONV222]], [[COND_FALSE221]] ] +// SIMD-ONLY0-NEXT: [[CONV225:%.*]] = trunc i32 [[COND224]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV225]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV226:%.*]] = zext i16 [[TMP105]] to i32 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV227:%.*]] = zext i16 [[TMP106]] to i32 +// SIMD-ONLY0-NEXT: [[CMP228:%.*]] = icmp sgt i32 [[CONV226]], [[CONV227]] +// SIMD-ONLY0-NEXT: br i1 [[CMP228]], label [[COND_TRUE230:%.*]], label [[COND_FALSE232:%.*]] +// SIMD-ONLY0: cond.true230: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV231:%.*]] = zext i16 [[TMP107]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234:%.*]] +// SIMD-ONLY0: cond.false232: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV233:%.*]] = zext i16 [[TMP108]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END234]] +// SIMD-ONLY0: cond.end234: +// SIMD-ONLY0-NEXT: [[COND235:%.*]] = phi i32 [ [[CONV231]], [[COND_TRUE230]] ], [ [[CONV233]], [[COND_FALSE232]] ] +// SIMD-ONLY0-NEXT: [[CONV236:%.*]] = trunc i32 [[COND235]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV236]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP109]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV237:%.*]] = zext i16 [[TMP110]] to i32 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV238:%.*]] = zext i16 [[TMP111]] to i32 +// SIMD-ONLY0-NEXT: [[CMP239:%.*]] = icmp slt i32 [[CONV237]], [[CONV238]] +// SIMD-ONLY0-NEXT: br i1 [[CMP239]], label [[COND_TRUE241:%.*]], label [[COND_FALSE243:%.*]] +// SIMD-ONLY0: cond.true241: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV242:%.*]] = zext i16 [[TMP112]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245:%.*]] +// SIMD-ONLY0: cond.false243: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV244:%.*]] = zext i16 [[TMP113]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END245]] +// SIMD-ONLY0: cond.end245: +// SIMD-ONLY0-NEXT: [[COND246:%.*]] = phi i32 [ [[CONV242]], [[COND_TRUE241]] ], [ [[CONV244]], [[COND_FALSE243]] ] +// SIMD-ONLY0-NEXT: [[CONV247:%.*]] = trunc i32 [[COND246]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV247]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP114]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV248:%.*]] = zext i16 [[TMP115]] to i32 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV249:%.*]] = zext i16 [[TMP116]] to i32 +// SIMD-ONLY0-NEXT: [[CMP250:%.*]] = icmp eq i32 [[CONV248]], [[CONV249]] +// SIMD-ONLY0-NEXT: br i1 [[CMP250]], label [[COND_TRUE252:%.*]], label [[COND_FALSE254:%.*]] +// SIMD-ONLY0: cond.true252: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV253:%.*]] = zext i16 [[TMP117]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256:%.*]] +// SIMD-ONLY0: cond.false254: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV255:%.*]] = zext i16 [[TMP118]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END256]] +// SIMD-ONLY0: cond.end256: +// SIMD-ONLY0-NEXT: [[COND257:%.*]] = phi i32 [ [[CONV253]], [[COND_TRUE252]] ], [ [[CONV255]], [[COND_FALSE254]] ] +// SIMD-ONLY0-NEXT: [[CONV258:%.*]] = trunc i32 [[COND257]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV258]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP119]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP120]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV259:%.*]] = zext i16 [[TMP121]] to i32 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV260:%.*]] = zext i16 [[TMP122]] to i32 +// SIMD-ONLY0-NEXT: [[CMP261:%.*]] = icmp sgt i32 [[CONV259]], [[CONV260]] +// SIMD-ONLY0-NEXT: br i1 [[CMP261]], label [[COND_TRUE263:%.*]], label [[COND_FALSE265:%.*]] +// SIMD-ONLY0: cond.true263: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV264:%.*]] = zext i16 [[TMP123]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267:%.*]] +// SIMD-ONLY0: cond.false265: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV266:%.*]] = zext i16 [[TMP124]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END267]] +// SIMD-ONLY0: cond.end267: +// SIMD-ONLY0-NEXT: [[COND268:%.*]] = phi i32 [ [[CONV264]], [[COND_TRUE263]] ], [ [[CONV266]], [[COND_FALSE265]] ] +// SIMD-ONLY0-NEXT: [[CONV269:%.*]] = trunc i32 [[COND268]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV269]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP125]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV270:%.*]] = zext i16 [[TMP126]] to i32 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV271:%.*]] = zext i16 [[TMP127]] to i32 +// SIMD-ONLY0-NEXT: [[CMP272:%.*]] = icmp slt i32 [[CONV270]], [[CONV271]] +// SIMD-ONLY0-NEXT: br i1 [[CMP272]], label [[COND_TRUE274:%.*]], label [[COND_FALSE276:%.*]] +// SIMD-ONLY0: cond.true274: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV275:%.*]] = zext i16 [[TMP128]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278:%.*]] +// SIMD-ONLY0: cond.false276: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV277:%.*]] = zext i16 [[TMP129]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END278]] +// SIMD-ONLY0: cond.end278: +// SIMD-ONLY0-NEXT: [[COND279:%.*]] = phi i32 [ [[CONV275]], [[COND_TRUE274]] ], [ [[CONV277]], [[COND_FALSE276]] ] +// SIMD-ONLY0-NEXT: [[CONV280:%.*]] = trunc i32 [[COND279]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV280]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP130]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV281:%.*]] = zext i16 [[TMP131]] to i32 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV282:%.*]] = zext i16 [[TMP132]] to i32 +// SIMD-ONLY0-NEXT: [[CMP283:%.*]] = icmp eq i32 [[CONV281]], [[CONV282]] +// SIMD-ONLY0-NEXT: br i1 [[CMP283]], label [[COND_TRUE285:%.*]], label [[COND_FALSE287:%.*]] +// SIMD-ONLY0: cond.true285: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV286:%.*]] = zext i16 [[TMP133]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289:%.*]] +// SIMD-ONLY0: cond.false287: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV288:%.*]] = zext i16 [[TMP134]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END289]] +// SIMD-ONLY0: cond.end289: +// SIMD-ONLY0-NEXT: [[COND290:%.*]] = phi i32 [ [[CONV286]], [[COND_TRUE285]] ], [ [[CONV288]], [[COND_FALSE287]] ] +// SIMD-ONLY0-NEXT: [[CONV291:%.*]] = trunc i32 [[COND290]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV291]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV292:%.*]] = zext i16 [[TMP135]] to i32 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV293:%.*]] = zext i16 [[TMP136]] to i32 +// SIMD-ONLY0-NEXT: [[CMP294:%.*]] = icmp sgt i32 [[CONV292]], [[CONV293]] +// SIMD-ONLY0-NEXT: br i1 [[CMP294]], label [[COND_TRUE296:%.*]], label [[COND_FALSE298:%.*]] +// SIMD-ONLY0: cond.true296: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV297:%.*]] = zext i16 [[TMP137]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300:%.*]] +// SIMD-ONLY0: cond.false298: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV299:%.*]] = zext i16 [[TMP138]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END300]] +// SIMD-ONLY0: cond.end300: +// SIMD-ONLY0-NEXT: [[COND301:%.*]] = phi i32 [ [[CONV297]], [[COND_TRUE296]] ], [ [[CONV299]], [[COND_FALSE298]] ] +// SIMD-ONLY0-NEXT: [[CONV302:%.*]] = trunc i32 [[COND301]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV302]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP139]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV303:%.*]] = zext i16 [[TMP140]] to i32 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV304:%.*]] = zext i16 [[TMP141]] to i32 +// SIMD-ONLY0-NEXT: [[CMP305:%.*]] = icmp slt i32 [[CONV303]], [[CONV304]] +// SIMD-ONLY0-NEXT: br i1 [[CMP305]], label [[COND_TRUE307:%.*]], label [[COND_FALSE309:%.*]] +// SIMD-ONLY0: cond.true307: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV308:%.*]] = zext i16 [[TMP142]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311:%.*]] +// SIMD-ONLY0: cond.false309: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV310:%.*]] = zext i16 [[TMP143]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END311]] +// SIMD-ONLY0: cond.end311: +// SIMD-ONLY0-NEXT: [[COND312:%.*]] = phi i32 [ [[CONV308]], [[COND_TRUE307]] ], [ [[CONV310]], [[COND_FALSE309]] ] +// SIMD-ONLY0-NEXT: [[CONV313:%.*]] = trunc i32 [[COND312]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV313]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP144]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV314:%.*]] = zext i16 [[TMP145]] to i32 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV315:%.*]] = zext i16 [[TMP146]] to i32 +// SIMD-ONLY0-NEXT: [[CMP316:%.*]] = icmp eq i32 [[CONV314]], [[CONV315]] +// SIMD-ONLY0-NEXT: br i1 [[CMP316]], label [[COND_TRUE318:%.*]], label [[COND_FALSE320:%.*]] +// SIMD-ONLY0: cond.true318: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV319:%.*]] = zext i16 [[TMP147]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322:%.*]] +// SIMD-ONLY0: cond.false320: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV321:%.*]] = zext i16 [[TMP148]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END322]] +// SIMD-ONLY0: cond.end322: +// SIMD-ONLY0-NEXT: [[COND323:%.*]] = phi i32 [ [[CONV319]], [[COND_TRUE318]] ], [ [[CONV321]], [[COND_FALSE320]] ] +// SIMD-ONLY0-NEXT: [[CONV324:%.*]] = trunc i32 [[COND323]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV324]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP149]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP150]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV325:%.*]] = zext i16 [[TMP151]] to i32 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV326:%.*]] = zext i16 [[TMP152]] to i32 +// SIMD-ONLY0-NEXT: [[CMP327:%.*]] = icmp sgt i32 [[CONV325]], [[CONV326]] +// SIMD-ONLY0-NEXT: br i1 [[CMP327]], label [[COND_TRUE329:%.*]], label [[COND_FALSE331:%.*]] +// SIMD-ONLY0: cond.true329: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV330:%.*]] = zext i16 [[TMP153]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333:%.*]] +// SIMD-ONLY0: cond.false331: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV332:%.*]] = zext i16 [[TMP154]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END333]] +// SIMD-ONLY0: cond.end333: +// SIMD-ONLY0-NEXT: [[COND334:%.*]] = phi i32 [ [[CONV330]], [[COND_TRUE329]] ], [ [[CONV332]], [[COND_FALSE331]] ] +// SIMD-ONLY0-NEXT: [[CONV335:%.*]] = trunc i32 [[COND334]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV335]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP155]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV336:%.*]] = zext i16 [[TMP156]] to i32 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV337:%.*]] = zext i16 [[TMP157]] to i32 +// SIMD-ONLY0-NEXT: [[CMP338:%.*]] = icmp slt i32 [[CONV336]], [[CONV337]] +// SIMD-ONLY0-NEXT: br i1 [[CMP338]], label [[COND_TRUE340:%.*]], label [[COND_FALSE342:%.*]] +// SIMD-ONLY0: cond.true340: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV341:%.*]] = zext i16 [[TMP158]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344:%.*]] +// SIMD-ONLY0: cond.false342: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV343:%.*]] = zext i16 [[TMP159]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END344]] +// SIMD-ONLY0: cond.end344: +// SIMD-ONLY0-NEXT: [[COND345:%.*]] = phi i32 [ [[CONV341]], [[COND_TRUE340]] ], [ [[CONV343]], [[COND_FALSE342]] ] +// SIMD-ONLY0-NEXT: [[CONV346:%.*]] = trunc i32 [[COND345]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV346]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP160]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV347:%.*]] = zext i16 [[TMP161]] to i32 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV348:%.*]] = zext i16 [[TMP162]] to i32 +// SIMD-ONLY0-NEXT: [[CMP349:%.*]] = icmp eq i32 [[CONV347]], [[CONV348]] +// SIMD-ONLY0-NEXT: br i1 [[CMP349]], label [[COND_TRUE351:%.*]], label [[COND_FALSE353:%.*]] +// SIMD-ONLY0: cond.true351: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV352:%.*]] = zext i16 [[TMP163]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355:%.*]] +// SIMD-ONLY0: cond.false353: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV354:%.*]] = zext i16 [[TMP164]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END355]] +// SIMD-ONLY0: cond.end355: +// SIMD-ONLY0-NEXT: [[COND356:%.*]] = phi i32 [ [[CONV352]], [[COND_TRUE351]] ], [ [[CONV354]], [[COND_FALSE353]] ] +// SIMD-ONLY0-NEXT: [[CONV357:%.*]] = trunc i32 [[COND356]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV357]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV358:%.*]] = zext i16 [[TMP165]] to i32 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV359:%.*]] = zext i16 [[TMP166]] to i32 +// SIMD-ONLY0-NEXT: [[CMP360:%.*]] = icmp sgt i32 [[CONV358]], [[CONV359]] +// SIMD-ONLY0-NEXT: br i1 [[CMP360]], label [[COND_TRUE362:%.*]], label [[COND_FALSE364:%.*]] +// SIMD-ONLY0: cond.true362: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV363:%.*]] = zext i16 [[TMP167]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366:%.*]] +// SIMD-ONLY0: cond.false364: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV365:%.*]] = zext i16 [[TMP168]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END366]] +// SIMD-ONLY0: cond.end366: +// SIMD-ONLY0-NEXT: [[COND367:%.*]] = phi i32 [ [[CONV363]], [[COND_TRUE362]] ], [ [[CONV365]], [[COND_FALSE364]] ] +// SIMD-ONLY0-NEXT: [[CONV368:%.*]] = trunc i32 [[COND367]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV368]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP169]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV369:%.*]] = zext i16 [[TMP170]] to i32 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV370:%.*]] = zext i16 [[TMP171]] to i32 +// SIMD-ONLY0-NEXT: [[CMP371:%.*]] = icmp slt i32 [[CONV369]], [[CONV370]] +// SIMD-ONLY0-NEXT: br i1 [[CMP371]], label [[COND_TRUE373:%.*]], label [[COND_FALSE375:%.*]] +// SIMD-ONLY0: cond.true373: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV374:%.*]] = zext i16 [[TMP172]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377:%.*]] +// SIMD-ONLY0: cond.false375: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV376:%.*]] = zext i16 [[TMP173]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END377]] +// SIMD-ONLY0: cond.end377: +// SIMD-ONLY0-NEXT: [[COND378:%.*]] = phi i32 [ [[CONV374]], [[COND_TRUE373]] ], [ [[CONV376]], [[COND_FALSE375]] ] +// SIMD-ONLY0-NEXT: [[CONV379:%.*]] = trunc i32 [[COND378]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV379]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP174]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV380:%.*]] = zext i16 [[TMP175]] to i32 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i16, ptr [[USE]], align 2 +// SIMD-ONLY0-NEXT: [[CONV381:%.*]] = zext i16 [[TMP176]] to i32 +// SIMD-ONLY0-NEXT: [[CMP382:%.*]] = icmp eq i32 [[CONV380]], [[CONV381]] +// SIMD-ONLY0-NEXT: br i1 [[CMP382]], label [[COND_TRUE384:%.*]], label [[COND_FALSE386:%.*]] +// SIMD-ONLY0: cond.true384: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i16, ptr [[USD]], align 2 +// SIMD-ONLY0-NEXT: [[CONV385:%.*]] = zext i16 [[TMP177]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388:%.*]] +// SIMD-ONLY0: cond.false386: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[CONV387:%.*]] = zext i16 [[TMP178]] to i32 +// SIMD-ONLY0-NEXT: br label [[COND_END388]] +// SIMD-ONLY0: cond.end388: +// SIMD-ONLY0-NEXT: [[COND389:%.*]] = phi i32 [ [[CONV385]], [[COND_TRUE384]] ], [ [[CONV387]], [[COND_FALSE386]] ] +// SIMD-ONLY0-NEXT: [[CONV390:%.*]] = trunc i32 [[COND389]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV390]], ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i16, ptr [[USX]], align 2 +// SIMD-ONLY0-NEXT: store i16 [[TMP179]], ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i16, ptr [[USV]], align 2 +// SIMD-ONLY0-NEXT: ret i16 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @ixevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[IX:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[IE:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[ID:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP0]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP5]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND5]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP10]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = icmp eq i32 [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND10]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND15]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP19]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = icmp slt i32 [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND20]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP24]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = icmp eq i32 [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi i32 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND25]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP29]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP30]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = icmp sgt i32 [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi i32 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND30]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP35]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = icmp slt i32 [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND35]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP40]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = icmp eq i32 [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND40]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp sgt i32 [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi i32 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND45]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP49]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND50]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP54]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = icmp eq i32 [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi i32 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND55]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP59]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP60]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = icmp sgt i32 [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi i32 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND60]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP65]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = icmp slt i32 [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi i32 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND65]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP70]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = icmp eq i32 [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i32 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND70]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = icmp sgt i32 [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi i32 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND75]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP79]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = icmp slt i32 [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi i32 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND80]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP84]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = icmp eq i32 [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi i32 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND85]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP89]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP90]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = icmp sgt i32 [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi i32 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND90]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP95]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp slt i32 [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi i32 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND95]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP100]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp eq i32 [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi i32 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND100]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = icmp sgt i32 [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi i32 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND105]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP109]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = icmp slt i32 [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi i32 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND110]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP114]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = icmp eq i32 [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi i32 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND115]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP119]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP120]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = icmp sgt i32 [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi i32 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND120]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP125]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = icmp slt i32 [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i32 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND125]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP130]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = icmp eq i32 [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi i32 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND130]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = icmp sgt i32 [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi i32 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND135]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP139]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = icmp slt i32 [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi i32 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND140]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP144]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp eq i32 [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi i32 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND145]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP149]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP150]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = icmp sgt i32 [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi i32 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND150]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP155]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp slt i32 [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi i32 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND155]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP160]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = icmp eq i32 [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi i32 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND160]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = icmp sgt i32 [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi i32 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND165]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP169]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = icmp slt i32 [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi i32 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND170]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP174]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i32, ptr [[IE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = icmp eq i32 [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i32, ptr [[ID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi i32 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND175]], ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i32, ptr [[IX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP179]], ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i32, ptr [[IV]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @uixevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[UIX:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UIE:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[UID:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP0]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i32 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP5]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND5]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP10]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = icmp eq i32 [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND10]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = icmp ugt i32 [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND15]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP19]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = icmp ult i32 [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND20]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP24]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = icmp eq i32 [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi i32 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND25]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP29]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP30]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = icmp ugt i32 [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi i32 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND30]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP35]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND35]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP40]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = icmp eq i32 [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND40]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp ugt i32 [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi i32 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND45]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP49]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = icmp ult i32 [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi i32 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND50]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP54]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = icmp eq i32 [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi i32 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND55]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP59]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP60]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = icmp ugt i32 [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi i32 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND60]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP65]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = icmp ult i32 [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi i32 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND65]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP70]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = icmp eq i32 [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i32 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND70]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = icmp ugt i32 [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi i32 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND75]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP79]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = icmp ult i32 [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi i32 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND80]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP84]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = icmp eq i32 [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi i32 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND85]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP89]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP90]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = icmp ugt i32 [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi i32 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND90]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP95]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp ult i32 [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi i32 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND95]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP100]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp eq i32 [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi i32 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND100]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = icmp ugt i32 [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi i32 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND105]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP109]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = icmp ult i32 [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi i32 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND110]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP114]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = icmp eq i32 [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi i32 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND115]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP119]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP120]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = icmp ugt i32 [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi i32 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND120]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP125]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = icmp ult i32 [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i32 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND125]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP130]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = icmp eq i32 [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi i32 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND130]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = icmp ugt i32 [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi i32 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND135]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP139]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = icmp ult i32 [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi i32 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND140]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP144]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp eq i32 [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi i32 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND145]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP149]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP150]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = icmp ugt i32 [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi i32 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND150]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP155]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp ult i32 [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi i32 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND155]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP160]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = icmp eq i32 [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi i32 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND160]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = icmp ugt i32 [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi i32 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND165]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP169]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = icmp ult i32 [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi i32 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND170]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP174]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i32, ptr [[UIE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = icmp eq i32 [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i32, ptr [[UID]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi i32 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store i32 [[COND175]], ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i32, ptr [[UIX]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP179]], ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i32, ptr [[UIV]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @lxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[LX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP0]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP5]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = icmp slt i64 [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND5]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP10]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND10]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND15]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP19]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = icmp slt i64 [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND20]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP24]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND25]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP29]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP30]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = icmp sgt i64 [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND30]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP35]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND35]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP40]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND40]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND45]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP49]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = icmp slt i64 [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND50]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP54]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND55]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP59]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP60]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = icmp sgt i64 [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND60]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP65]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = icmp slt i64 [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND65]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP70]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND70]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = icmp sgt i64 [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND75]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP79]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = icmp slt i64 [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND80]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP84]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND85]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP89]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP90]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = icmp sgt i64 [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND90]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP95]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp slt i64 [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND95]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP100]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND100]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = icmp sgt i64 [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND105]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP109]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = icmp slt i64 [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND110]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP114]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND115]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP119]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP120]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = icmp sgt i64 [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND120]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP125]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = icmp slt i64 [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND125]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP130]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND130]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = icmp sgt i64 [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND135]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP139]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = icmp slt i64 [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND140]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP144]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND145]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP149]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP150]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = icmp sgt i64 [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND150]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP155]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp slt i64 [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND155]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP160]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND160]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = icmp sgt i64 [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND165]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP169]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = icmp slt i64 [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND170]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP174]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i64, ptr [[LE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i64, ptr [[LD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND175]], ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i64, ptr [[LX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP179]], ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i64, ptr [[LV]], align 8 +// SIMD-ONLY0-NEXT: ret i64 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @ulxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[ULX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP0]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP5]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND5]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP10]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND10]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = icmp ugt i64 [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND15]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP19]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND20]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP24]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND25]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP29]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP30]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = icmp ugt i64 [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND30]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP35]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = icmp ult i64 [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND35]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP40]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND40]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp ugt i64 [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND45]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP49]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = icmp ult i64 [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND50]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP54]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND55]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP59]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP60]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = icmp ugt i64 [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND60]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP65]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = icmp ult i64 [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND65]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP70]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND70]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = icmp ugt i64 [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND75]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP79]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = icmp ult i64 [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND80]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP84]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND85]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP89]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP90]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = icmp ugt i64 [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND90]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP95]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp ult i64 [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND95]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP100]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND100]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = icmp ugt i64 [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND105]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP109]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = icmp ult i64 [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND110]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP114]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND115]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP119]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP120]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = icmp ugt i64 [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND120]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP125]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = icmp ult i64 [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND125]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP130]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND130]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = icmp ugt i64 [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND135]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP139]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = icmp ult i64 [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND140]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP144]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND145]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP149]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP150]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = icmp ugt i64 [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND150]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP155]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp ult i64 [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND155]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP160]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND160]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = icmp ugt i64 [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND165]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP169]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = icmp ult i64 [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND170]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP174]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i64, ptr [[ULE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i64, ptr [[ULD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND175]], ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i64, ptr [[ULX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP179]], ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i64, ptr [[ULV]], align 8 +// SIMD-ONLY0-NEXT: ret i64 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @llxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[LLX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[LLD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP0]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP5]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = icmp slt i64 [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND5]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP10]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND10]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND15]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP19]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = icmp slt i64 [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND20]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP24]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND25]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP29]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP30]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = icmp sgt i64 [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND30]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP35]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = icmp slt i64 [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND35]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP40]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND40]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp sgt i64 [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND45]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP49]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = icmp slt i64 [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND50]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP54]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND55]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP59]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP60]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = icmp sgt i64 [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND60]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP65]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = icmp slt i64 [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND65]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP70]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND70]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = icmp sgt i64 [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND75]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP79]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = icmp slt i64 [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND80]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP84]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND85]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP89]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP90]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = icmp sgt i64 [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND90]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP95]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp slt i64 [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND95]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP100]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND100]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = icmp sgt i64 [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND105]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP109]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = icmp slt i64 [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND110]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP114]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND115]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP119]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP120]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = icmp sgt i64 [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND120]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP125]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = icmp slt i64 [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND125]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP130]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND130]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = icmp sgt i64 [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND135]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP139]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = icmp slt i64 [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND140]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP144]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND145]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP149]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP150]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = icmp sgt i64 [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND150]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP155]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp slt i64 [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND155]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP160]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND160]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = icmp sgt i64 [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND165]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP169]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = icmp slt i64 [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND170]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP174]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i64, ptr [[LLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i64, ptr [[LLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND175]], ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i64, ptr [[LLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP179]], ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i64, ptr [[LLV]], align 8 +// SIMD-ONLY0-NEXT: ret i64 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @ullxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[ULLX:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLV:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLE:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[ULLD:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP0]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi i64 [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP5]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = icmp ult i64 [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi i64 [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND5]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP10]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = icmp eq i64 [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi i64 [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND10]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = icmp ugt i64 [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi i64 [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND15]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP19]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = icmp ult i64 [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND20]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP24]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = icmp eq i64 [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi i64 [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND25]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP29]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP30]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = icmp ugt i64 [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi i64 [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND30]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP35]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = icmp ult i64 [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND35]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP40]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = icmp eq i64 [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND40]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = icmp ugt i64 [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi i64 [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND45]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP49]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = icmp ult i64 [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi i64 [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND50]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP54]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = icmp eq i64 [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi i64 [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND55]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP59]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP60]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = icmp ugt i64 [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi i64 [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND60]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP65]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = icmp ult i64 [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi i64 [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND65]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP70]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = icmp eq i64 [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi i64 [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND70]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = icmp ugt i64 [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi i64 [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND75]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP79]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = icmp ult i64 [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi i64 [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND80]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP84]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = icmp eq i64 [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi i64 [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND85]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP89]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP90]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = icmp ugt i64 [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi i64 [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND90]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP95]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = icmp ult i64 [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi i64 [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND95]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP100]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = icmp eq i64 [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi i64 [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND100]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = icmp ugt i64 [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi i64 [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND105]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP109]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = icmp ult i64 [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi i64 [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND110]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP114]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = icmp eq i64 [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi i64 [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND115]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP119]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP120]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = icmp ugt i64 [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi i64 [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND120]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP125]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = icmp ult i64 [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi i64 [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND125]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP130]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = icmp eq i64 [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi i64 [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND130]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = icmp ugt i64 [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi i64 [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND135]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP139]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = icmp ult i64 [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi i64 [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND140]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP144]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = icmp eq i64 [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi i64 [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND145]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP149]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP150]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = icmp ugt i64 [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi i64 [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND150]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP155]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = icmp ult i64 [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi i64 [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND155]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP160]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = icmp eq i64 [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi i64 [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND160]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = icmp ugt i64 [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi i64 [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND165]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP169]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = icmp ult i64 [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi i64 [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND170]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP174]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load i64, ptr [[ULLE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = icmp eq i64 [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load i64, ptr [[ULLD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi i64 [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store i64 [[COND175]], ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load i64, ptr [[ULLX]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP179]], ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load i64, ptr [[ULLV]], align 8 +// SIMD-ONLY0-NEXT: ret i64 [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @fxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[FX:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FV:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FE:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[FD:%.*]] = alloca float, align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP0]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = fcmp ogt float [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi float [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store float [[COND]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP5]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = fcmp olt float [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi float [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store float [[COND5]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP10]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = fcmp oeq float [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi float [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store float [[COND10]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = fcmp ogt float [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi float [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store float [[COND15]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP19]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = fcmp olt float [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi float [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store float [[COND20]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP24]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = fcmp oeq float [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi float [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store float [[COND25]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP29]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP30]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = fcmp ogt float [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi float [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store float [[COND30]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP35]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = fcmp olt float [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi float [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store float [[COND35]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP40]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = fcmp oeq float [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi float [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store float [[COND40]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = fcmp ogt float [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi float [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store float [[COND45]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP49]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = fcmp olt float [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi float [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store float [[COND50]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP54]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = fcmp oeq float [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi float [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store float [[COND55]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP59]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP60]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = fcmp ogt float [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi float [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store float [[COND60]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP65]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = fcmp olt float [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi float [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store float [[COND65]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP70]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = fcmp oeq float [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi float [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store float [[COND70]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = fcmp ogt float [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi float [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store float [[COND75]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP79]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = fcmp olt float [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi float [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store float [[COND80]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP84]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = fcmp oeq float [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi float [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store float [[COND85]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP89]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP90]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = fcmp ogt float [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi float [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store float [[COND90]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP95]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = fcmp olt float [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi float [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store float [[COND95]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP100]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = fcmp oeq float [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi float [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store float [[COND100]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = fcmp ogt float [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi float [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store float [[COND105]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP109]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = fcmp olt float [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi float [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store float [[COND110]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP114]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = fcmp oeq float [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi float [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store float [[COND115]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP119]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP120]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = fcmp ogt float [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi float [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store float [[COND120]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP125]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = fcmp olt float [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi float [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store float [[COND125]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP130]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = fcmp oeq float [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi float [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store float [[COND130]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = fcmp ogt float [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi float [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store float [[COND135]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP139]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = fcmp olt float [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi float [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store float [[COND140]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP144]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = fcmp oeq float [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi float [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store float [[COND145]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP149]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP150]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = fcmp ogt float [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi float [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store float [[COND150]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP155]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = fcmp olt float [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi float [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store float [[COND155]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP160]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = fcmp oeq float [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi float [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store float [[COND160]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = fcmp ogt float [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi float [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store float [[COND165]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP169]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = fcmp olt float [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi float [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store float [[COND170]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP174]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load float, ptr [[FE]], align 4 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = fcmp oeq float [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load float, ptr [[FD]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi float [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store float [[COND175]], ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load float, ptr [[FX]], align 4 +// SIMD-ONLY0-NEXT: store float [[TMP179]], ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load float, ptr [[FV]], align 4 +// SIMD-ONLY0-NEXT: ret float [[TMP180]] +// +// +// SIMD-ONLY0-LABEL: @dxevd( +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[DX:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DV:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DE:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[DD:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP0]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// SIMD-ONLY0: cond.true: +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END:%.*]] +// SIMD-ONLY0: cond.false: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END]] +// SIMD-ONLY0: cond.end: +// SIMD-ONLY0-NEXT: [[COND:%.*]] = phi double [ [[TMP3]], [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// SIMD-ONLY0-NEXT: store double [[COND]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP5]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP1:%.*]] = fcmp olt double [[TMP6]], [[TMP7]] +// SIMD-ONLY0-NEXT: br i1 [[CMP1]], label [[COND_TRUE2:%.*]], label [[COND_FALSE3:%.*]] +// SIMD-ONLY0: cond.true2: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4:%.*]] +// SIMD-ONLY0: cond.false3: +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END4]] +// SIMD-ONLY0: cond.end4: +// SIMD-ONLY0-NEXT: [[COND5:%.*]] = phi double [ [[TMP8]], [[COND_TRUE2]] ], [ [[TMP9]], [[COND_FALSE3]] ] +// SIMD-ONLY0-NEXT: store double [[COND5]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP10]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP6:%.*]] = fcmp oeq double [[TMP11]], [[TMP12]] +// SIMD-ONLY0-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// SIMD-ONLY0: cond.true7: +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9:%.*]] +// SIMD-ONLY0: cond.false8: +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END9]] +// SIMD-ONLY0: cond.end9: +// SIMD-ONLY0-NEXT: [[COND10:%.*]] = phi double [ [[TMP13]], [[COND_TRUE7]] ], [ [[TMP14]], [[COND_FALSE8]] ] +// SIMD-ONLY0-NEXT: store double [[COND10]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP16:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP15]], [[TMP16]] +// SIMD-ONLY0-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// SIMD-ONLY0: cond.true12: +// SIMD-ONLY0-NEXT: [[TMP17:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14:%.*]] +// SIMD-ONLY0: cond.false13: +// SIMD-ONLY0-NEXT: [[TMP18:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END14]] +// SIMD-ONLY0: cond.end14: +// SIMD-ONLY0-NEXT: [[COND15:%.*]] = phi double [ [[TMP17]], [[COND_TRUE12]] ], [ [[TMP18]], [[COND_FALSE13]] ] +// SIMD-ONLY0-NEXT: store double [[COND15]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP19:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP19]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP20:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP21:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP16:%.*]] = fcmp olt double [[TMP20]], [[TMP21]] +// SIMD-ONLY0-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// SIMD-ONLY0: cond.true17: +// SIMD-ONLY0-NEXT: [[TMP22:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19:%.*]] +// SIMD-ONLY0: cond.false18: +// SIMD-ONLY0-NEXT: [[TMP23:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END19]] +// SIMD-ONLY0: cond.end19: +// SIMD-ONLY0-NEXT: [[COND20:%.*]] = phi double [ [[TMP22]], [[COND_TRUE17]] ], [ [[TMP23]], [[COND_FALSE18]] ] +// SIMD-ONLY0-NEXT: store double [[COND20]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP24:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP24]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP25:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP26:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP21:%.*]] = fcmp oeq double [[TMP25]], [[TMP26]] +// SIMD-ONLY0-NEXT: br i1 [[CMP21]], label [[COND_TRUE22:%.*]], label [[COND_FALSE23:%.*]] +// SIMD-ONLY0: cond.true22: +// SIMD-ONLY0-NEXT: [[TMP27:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24:%.*]] +// SIMD-ONLY0: cond.false23: +// SIMD-ONLY0-NEXT: [[TMP28:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END24]] +// SIMD-ONLY0: cond.end24: +// SIMD-ONLY0-NEXT: [[COND25:%.*]] = phi double [ [[TMP27]], [[COND_TRUE22]] ], [ [[TMP28]], [[COND_FALSE23]] ] +// SIMD-ONLY0-NEXT: store double [[COND25]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP29:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP29]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP30:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP30]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP31:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP32:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP26:%.*]] = fcmp ogt double [[TMP31]], [[TMP32]] +// SIMD-ONLY0-NEXT: br i1 [[CMP26]], label [[COND_TRUE27:%.*]], label [[COND_FALSE28:%.*]] +// SIMD-ONLY0: cond.true27: +// SIMD-ONLY0-NEXT: [[TMP33:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29:%.*]] +// SIMD-ONLY0: cond.false28: +// SIMD-ONLY0-NEXT: [[TMP34:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END29]] +// SIMD-ONLY0: cond.end29: +// SIMD-ONLY0-NEXT: [[COND30:%.*]] = phi double [ [[TMP33]], [[COND_TRUE27]] ], [ [[TMP34]], [[COND_FALSE28]] ] +// SIMD-ONLY0-NEXT: store double [[COND30]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP35:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP35]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP36:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP37:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP31:%.*]] = fcmp olt double [[TMP36]], [[TMP37]] +// SIMD-ONLY0-NEXT: br i1 [[CMP31]], label [[COND_TRUE32:%.*]], label [[COND_FALSE33:%.*]] +// SIMD-ONLY0: cond.true32: +// SIMD-ONLY0-NEXT: [[TMP38:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34:%.*]] +// SIMD-ONLY0: cond.false33: +// SIMD-ONLY0-NEXT: [[TMP39:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END34]] +// SIMD-ONLY0: cond.end34: +// SIMD-ONLY0-NEXT: [[COND35:%.*]] = phi double [ [[TMP38]], [[COND_TRUE32]] ], [ [[TMP39]], [[COND_FALSE33]] ] +// SIMD-ONLY0-NEXT: store double [[COND35]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP40:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP40]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP41:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP42:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP36:%.*]] = fcmp oeq double [[TMP41]], [[TMP42]] +// SIMD-ONLY0-NEXT: br i1 [[CMP36]], label [[COND_TRUE37:%.*]], label [[COND_FALSE38:%.*]] +// SIMD-ONLY0: cond.true37: +// SIMD-ONLY0-NEXT: [[TMP43:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39:%.*]] +// SIMD-ONLY0: cond.false38: +// SIMD-ONLY0-NEXT: [[TMP44:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END39]] +// SIMD-ONLY0: cond.end39: +// SIMD-ONLY0-NEXT: [[COND40:%.*]] = phi double [ [[TMP43]], [[COND_TRUE37]] ], [ [[TMP44]], [[COND_FALSE38]] ] +// SIMD-ONLY0-NEXT: store double [[COND40]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP45:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP46:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP41:%.*]] = fcmp ogt double [[TMP45]], [[TMP46]] +// SIMD-ONLY0-NEXT: br i1 [[CMP41]], label [[COND_TRUE42:%.*]], label [[COND_FALSE43:%.*]] +// SIMD-ONLY0: cond.true42: +// SIMD-ONLY0-NEXT: [[TMP47:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44:%.*]] +// SIMD-ONLY0: cond.false43: +// SIMD-ONLY0-NEXT: [[TMP48:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END44]] +// SIMD-ONLY0: cond.end44: +// SIMD-ONLY0-NEXT: [[COND45:%.*]] = phi double [ [[TMP47]], [[COND_TRUE42]] ], [ [[TMP48]], [[COND_FALSE43]] ] +// SIMD-ONLY0-NEXT: store double [[COND45]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP49:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP49]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP50:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP51:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP46:%.*]] = fcmp olt double [[TMP50]], [[TMP51]] +// SIMD-ONLY0-NEXT: br i1 [[CMP46]], label [[COND_TRUE47:%.*]], label [[COND_FALSE48:%.*]] +// SIMD-ONLY0: cond.true47: +// SIMD-ONLY0-NEXT: [[TMP52:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49:%.*]] +// SIMD-ONLY0: cond.false48: +// SIMD-ONLY0-NEXT: [[TMP53:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END49]] +// SIMD-ONLY0: cond.end49: +// SIMD-ONLY0-NEXT: [[COND50:%.*]] = phi double [ [[TMP52]], [[COND_TRUE47]] ], [ [[TMP53]], [[COND_FALSE48]] ] +// SIMD-ONLY0-NEXT: store double [[COND50]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP54:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP54]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP55:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP56:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP51:%.*]] = fcmp oeq double [[TMP55]], [[TMP56]] +// SIMD-ONLY0-NEXT: br i1 [[CMP51]], label [[COND_TRUE52:%.*]], label [[COND_FALSE53:%.*]] +// SIMD-ONLY0: cond.true52: +// SIMD-ONLY0-NEXT: [[TMP57:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54:%.*]] +// SIMD-ONLY0: cond.false53: +// SIMD-ONLY0-NEXT: [[TMP58:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END54]] +// SIMD-ONLY0: cond.end54: +// SIMD-ONLY0-NEXT: [[COND55:%.*]] = phi double [ [[TMP57]], [[COND_TRUE52]] ], [ [[TMP58]], [[COND_FALSE53]] ] +// SIMD-ONLY0-NEXT: store double [[COND55]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP59:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP59]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP60:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP60]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP61:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP62:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP56:%.*]] = fcmp ogt double [[TMP61]], [[TMP62]] +// SIMD-ONLY0-NEXT: br i1 [[CMP56]], label [[COND_TRUE57:%.*]], label [[COND_FALSE58:%.*]] +// SIMD-ONLY0: cond.true57: +// SIMD-ONLY0-NEXT: [[TMP63:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59:%.*]] +// SIMD-ONLY0: cond.false58: +// SIMD-ONLY0-NEXT: [[TMP64:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END59]] +// SIMD-ONLY0: cond.end59: +// SIMD-ONLY0-NEXT: [[COND60:%.*]] = phi double [ [[TMP63]], [[COND_TRUE57]] ], [ [[TMP64]], [[COND_FALSE58]] ] +// SIMD-ONLY0-NEXT: store double [[COND60]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP65:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP65]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP66:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP67:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP61:%.*]] = fcmp olt double [[TMP66]], [[TMP67]] +// SIMD-ONLY0-NEXT: br i1 [[CMP61]], label [[COND_TRUE62:%.*]], label [[COND_FALSE63:%.*]] +// SIMD-ONLY0: cond.true62: +// SIMD-ONLY0-NEXT: [[TMP68:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64:%.*]] +// SIMD-ONLY0: cond.false63: +// SIMD-ONLY0-NEXT: [[TMP69:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END64]] +// SIMD-ONLY0: cond.end64: +// SIMD-ONLY0-NEXT: [[COND65:%.*]] = phi double [ [[TMP68]], [[COND_TRUE62]] ], [ [[TMP69]], [[COND_FALSE63]] ] +// SIMD-ONLY0-NEXT: store double [[COND65]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP70:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP70]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP71:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP72:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP66:%.*]] = fcmp oeq double [[TMP71]], [[TMP72]] +// SIMD-ONLY0-NEXT: br i1 [[CMP66]], label [[COND_TRUE67:%.*]], label [[COND_FALSE68:%.*]] +// SIMD-ONLY0: cond.true67: +// SIMD-ONLY0-NEXT: [[TMP73:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69:%.*]] +// SIMD-ONLY0: cond.false68: +// SIMD-ONLY0-NEXT: [[TMP74:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END69]] +// SIMD-ONLY0: cond.end69: +// SIMD-ONLY0-NEXT: [[COND70:%.*]] = phi double [ [[TMP73]], [[COND_TRUE67]] ], [ [[TMP74]], [[COND_FALSE68]] ] +// SIMD-ONLY0-NEXT: store double [[COND70]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP75:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP76:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP71:%.*]] = fcmp ogt double [[TMP75]], [[TMP76]] +// SIMD-ONLY0-NEXT: br i1 [[CMP71]], label [[COND_TRUE72:%.*]], label [[COND_FALSE73:%.*]] +// SIMD-ONLY0: cond.true72: +// SIMD-ONLY0-NEXT: [[TMP77:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74:%.*]] +// SIMD-ONLY0: cond.false73: +// SIMD-ONLY0-NEXT: [[TMP78:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END74]] +// SIMD-ONLY0: cond.end74: +// SIMD-ONLY0-NEXT: [[COND75:%.*]] = phi double [ [[TMP77]], [[COND_TRUE72]] ], [ [[TMP78]], [[COND_FALSE73]] ] +// SIMD-ONLY0-NEXT: store double [[COND75]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP79:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP79]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP80:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP81:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP76:%.*]] = fcmp olt double [[TMP80]], [[TMP81]] +// SIMD-ONLY0-NEXT: br i1 [[CMP76]], label [[COND_TRUE77:%.*]], label [[COND_FALSE78:%.*]] +// SIMD-ONLY0: cond.true77: +// SIMD-ONLY0-NEXT: [[TMP82:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79:%.*]] +// SIMD-ONLY0: cond.false78: +// SIMD-ONLY0-NEXT: [[TMP83:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END79]] +// SIMD-ONLY0: cond.end79: +// SIMD-ONLY0-NEXT: [[COND80:%.*]] = phi double [ [[TMP82]], [[COND_TRUE77]] ], [ [[TMP83]], [[COND_FALSE78]] ] +// SIMD-ONLY0-NEXT: store double [[COND80]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP84:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP84]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP85:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP86:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP81:%.*]] = fcmp oeq double [[TMP85]], [[TMP86]] +// SIMD-ONLY0-NEXT: br i1 [[CMP81]], label [[COND_TRUE82:%.*]], label [[COND_FALSE83:%.*]] +// SIMD-ONLY0: cond.true82: +// SIMD-ONLY0-NEXT: [[TMP87:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84:%.*]] +// SIMD-ONLY0: cond.false83: +// SIMD-ONLY0-NEXT: [[TMP88:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END84]] +// SIMD-ONLY0: cond.end84: +// SIMD-ONLY0-NEXT: [[COND85:%.*]] = phi double [ [[TMP87]], [[COND_TRUE82]] ], [ [[TMP88]], [[COND_FALSE83]] ] +// SIMD-ONLY0-NEXT: store double [[COND85]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP89:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP89]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP90:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP90]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP91:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP92:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP86:%.*]] = fcmp ogt double [[TMP91]], [[TMP92]] +// SIMD-ONLY0-NEXT: br i1 [[CMP86]], label [[COND_TRUE87:%.*]], label [[COND_FALSE88:%.*]] +// SIMD-ONLY0: cond.true87: +// SIMD-ONLY0-NEXT: [[TMP93:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89:%.*]] +// SIMD-ONLY0: cond.false88: +// SIMD-ONLY0-NEXT: [[TMP94:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END89]] +// SIMD-ONLY0: cond.end89: +// SIMD-ONLY0-NEXT: [[COND90:%.*]] = phi double [ [[TMP93]], [[COND_TRUE87]] ], [ [[TMP94]], [[COND_FALSE88]] ] +// SIMD-ONLY0-NEXT: store double [[COND90]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP95:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP95]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP96:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP97:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP91:%.*]] = fcmp olt double [[TMP96]], [[TMP97]] +// SIMD-ONLY0-NEXT: br i1 [[CMP91]], label [[COND_TRUE92:%.*]], label [[COND_FALSE93:%.*]] +// SIMD-ONLY0: cond.true92: +// SIMD-ONLY0-NEXT: [[TMP98:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94:%.*]] +// SIMD-ONLY0: cond.false93: +// SIMD-ONLY0-NEXT: [[TMP99:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END94]] +// SIMD-ONLY0: cond.end94: +// SIMD-ONLY0-NEXT: [[COND95:%.*]] = phi double [ [[TMP98]], [[COND_TRUE92]] ], [ [[TMP99]], [[COND_FALSE93]] ] +// SIMD-ONLY0-NEXT: store double [[COND95]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP100:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP100]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP101:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP102:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP96:%.*]] = fcmp oeq double [[TMP101]], [[TMP102]] +// SIMD-ONLY0-NEXT: br i1 [[CMP96]], label [[COND_TRUE97:%.*]], label [[COND_FALSE98:%.*]] +// SIMD-ONLY0: cond.true97: +// SIMD-ONLY0-NEXT: [[TMP103:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99:%.*]] +// SIMD-ONLY0: cond.false98: +// SIMD-ONLY0-NEXT: [[TMP104:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END99]] +// SIMD-ONLY0: cond.end99: +// SIMD-ONLY0-NEXT: [[COND100:%.*]] = phi double [ [[TMP103]], [[COND_TRUE97]] ], [ [[TMP104]], [[COND_FALSE98]] ] +// SIMD-ONLY0-NEXT: store double [[COND100]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP105:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP106:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP101:%.*]] = fcmp ogt double [[TMP105]], [[TMP106]] +// SIMD-ONLY0-NEXT: br i1 [[CMP101]], label [[COND_TRUE102:%.*]], label [[COND_FALSE103:%.*]] +// SIMD-ONLY0: cond.true102: +// SIMD-ONLY0-NEXT: [[TMP107:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104:%.*]] +// SIMD-ONLY0: cond.false103: +// SIMD-ONLY0-NEXT: [[TMP108:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END104]] +// SIMD-ONLY0: cond.end104: +// SIMD-ONLY0-NEXT: [[COND105:%.*]] = phi double [ [[TMP107]], [[COND_TRUE102]] ], [ [[TMP108]], [[COND_FALSE103]] ] +// SIMD-ONLY0-NEXT: store double [[COND105]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP109:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP109]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP110:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP111:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP106:%.*]] = fcmp olt double [[TMP110]], [[TMP111]] +// SIMD-ONLY0-NEXT: br i1 [[CMP106]], label [[COND_TRUE107:%.*]], label [[COND_FALSE108:%.*]] +// SIMD-ONLY0: cond.true107: +// SIMD-ONLY0-NEXT: [[TMP112:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109:%.*]] +// SIMD-ONLY0: cond.false108: +// SIMD-ONLY0-NEXT: [[TMP113:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END109]] +// SIMD-ONLY0: cond.end109: +// SIMD-ONLY0-NEXT: [[COND110:%.*]] = phi double [ [[TMP112]], [[COND_TRUE107]] ], [ [[TMP113]], [[COND_FALSE108]] ] +// SIMD-ONLY0-NEXT: store double [[COND110]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP114:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP114]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP115:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP116:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP111:%.*]] = fcmp oeq double [[TMP115]], [[TMP116]] +// SIMD-ONLY0-NEXT: br i1 [[CMP111]], label [[COND_TRUE112:%.*]], label [[COND_FALSE113:%.*]] +// SIMD-ONLY0: cond.true112: +// SIMD-ONLY0-NEXT: [[TMP117:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114:%.*]] +// SIMD-ONLY0: cond.false113: +// SIMD-ONLY0-NEXT: [[TMP118:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END114]] +// SIMD-ONLY0: cond.end114: +// SIMD-ONLY0-NEXT: [[COND115:%.*]] = phi double [ [[TMP117]], [[COND_TRUE112]] ], [ [[TMP118]], [[COND_FALSE113]] ] +// SIMD-ONLY0-NEXT: store double [[COND115]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP119:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP119]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP120:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP120]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP121:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP122:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP116:%.*]] = fcmp ogt double [[TMP121]], [[TMP122]] +// SIMD-ONLY0-NEXT: br i1 [[CMP116]], label [[COND_TRUE117:%.*]], label [[COND_FALSE118:%.*]] +// SIMD-ONLY0: cond.true117: +// SIMD-ONLY0-NEXT: [[TMP123:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119:%.*]] +// SIMD-ONLY0: cond.false118: +// SIMD-ONLY0-NEXT: [[TMP124:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END119]] +// SIMD-ONLY0: cond.end119: +// SIMD-ONLY0-NEXT: [[COND120:%.*]] = phi double [ [[TMP123]], [[COND_TRUE117]] ], [ [[TMP124]], [[COND_FALSE118]] ] +// SIMD-ONLY0-NEXT: store double [[COND120]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP125:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP125]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP126:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP127:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP121:%.*]] = fcmp olt double [[TMP126]], [[TMP127]] +// SIMD-ONLY0-NEXT: br i1 [[CMP121]], label [[COND_TRUE122:%.*]], label [[COND_FALSE123:%.*]] +// SIMD-ONLY0: cond.true122: +// SIMD-ONLY0-NEXT: [[TMP128:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124:%.*]] +// SIMD-ONLY0: cond.false123: +// SIMD-ONLY0-NEXT: [[TMP129:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END124]] +// SIMD-ONLY0: cond.end124: +// SIMD-ONLY0-NEXT: [[COND125:%.*]] = phi double [ [[TMP128]], [[COND_TRUE122]] ], [ [[TMP129]], [[COND_FALSE123]] ] +// SIMD-ONLY0-NEXT: store double [[COND125]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP130:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP130]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP131:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP132:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP126:%.*]] = fcmp oeq double [[TMP131]], [[TMP132]] +// SIMD-ONLY0-NEXT: br i1 [[CMP126]], label [[COND_TRUE127:%.*]], label [[COND_FALSE128:%.*]] +// SIMD-ONLY0: cond.true127: +// SIMD-ONLY0-NEXT: [[TMP133:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129:%.*]] +// SIMD-ONLY0: cond.false128: +// SIMD-ONLY0-NEXT: [[TMP134:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END129]] +// SIMD-ONLY0: cond.end129: +// SIMD-ONLY0-NEXT: [[COND130:%.*]] = phi double [ [[TMP133]], [[COND_TRUE127]] ], [ [[TMP134]], [[COND_FALSE128]] ] +// SIMD-ONLY0-NEXT: store double [[COND130]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP135:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP136:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP131:%.*]] = fcmp ogt double [[TMP135]], [[TMP136]] +// SIMD-ONLY0-NEXT: br i1 [[CMP131]], label [[COND_TRUE132:%.*]], label [[COND_FALSE133:%.*]] +// SIMD-ONLY0: cond.true132: +// SIMD-ONLY0-NEXT: [[TMP137:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134:%.*]] +// SIMD-ONLY0: cond.false133: +// SIMD-ONLY0-NEXT: [[TMP138:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END134]] +// SIMD-ONLY0: cond.end134: +// SIMD-ONLY0-NEXT: [[COND135:%.*]] = phi double [ [[TMP137]], [[COND_TRUE132]] ], [ [[TMP138]], [[COND_FALSE133]] ] +// SIMD-ONLY0-NEXT: store double [[COND135]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP139:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP139]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP140:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP141:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP136:%.*]] = fcmp olt double [[TMP140]], [[TMP141]] +// SIMD-ONLY0-NEXT: br i1 [[CMP136]], label [[COND_TRUE137:%.*]], label [[COND_FALSE138:%.*]] +// SIMD-ONLY0: cond.true137: +// SIMD-ONLY0-NEXT: [[TMP142:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139:%.*]] +// SIMD-ONLY0: cond.false138: +// SIMD-ONLY0-NEXT: [[TMP143:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END139]] +// SIMD-ONLY0: cond.end139: +// SIMD-ONLY0-NEXT: [[COND140:%.*]] = phi double [ [[TMP142]], [[COND_TRUE137]] ], [ [[TMP143]], [[COND_FALSE138]] ] +// SIMD-ONLY0-NEXT: store double [[COND140]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP144:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP144]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP145:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP146:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP141:%.*]] = fcmp oeq double [[TMP145]], [[TMP146]] +// SIMD-ONLY0-NEXT: br i1 [[CMP141]], label [[COND_TRUE142:%.*]], label [[COND_FALSE143:%.*]] +// SIMD-ONLY0: cond.true142: +// SIMD-ONLY0-NEXT: [[TMP147:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144:%.*]] +// SIMD-ONLY0: cond.false143: +// SIMD-ONLY0-NEXT: [[TMP148:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END144]] +// SIMD-ONLY0: cond.end144: +// SIMD-ONLY0-NEXT: [[COND145:%.*]] = phi double [ [[TMP147]], [[COND_TRUE142]] ], [ [[TMP148]], [[COND_FALSE143]] ] +// SIMD-ONLY0-NEXT: store double [[COND145]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP149:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP149]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP150:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP150]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP151:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP152:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP146:%.*]] = fcmp ogt double [[TMP151]], [[TMP152]] +// SIMD-ONLY0-NEXT: br i1 [[CMP146]], label [[COND_TRUE147:%.*]], label [[COND_FALSE148:%.*]] +// SIMD-ONLY0: cond.true147: +// SIMD-ONLY0-NEXT: [[TMP153:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149:%.*]] +// SIMD-ONLY0: cond.false148: +// SIMD-ONLY0-NEXT: [[TMP154:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END149]] +// SIMD-ONLY0: cond.end149: +// SIMD-ONLY0-NEXT: [[COND150:%.*]] = phi double [ [[TMP153]], [[COND_TRUE147]] ], [ [[TMP154]], [[COND_FALSE148]] ] +// SIMD-ONLY0-NEXT: store double [[COND150]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP155:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP155]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP156:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP157:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP151:%.*]] = fcmp olt double [[TMP156]], [[TMP157]] +// SIMD-ONLY0-NEXT: br i1 [[CMP151]], label [[COND_TRUE152:%.*]], label [[COND_FALSE153:%.*]] +// SIMD-ONLY0: cond.true152: +// SIMD-ONLY0-NEXT: [[TMP158:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154:%.*]] +// SIMD-ONLY0: cond.false153: +// SIMD-ONLY0-NEXT: [[TMP159:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END154]] +// SIMD-ONLY0: cond.end154: +// SIMD-ONLY0-NEXT: [[COND155:%.*]] = phi double [ [[TMP158]], [[COND_TRUE152]] ], [ [[TMP159]], [[COND_FALSE153]] ] +// SIMD-ONLY0-NEXT: store double [[COND155]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP160:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP160]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP161:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP162:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP156:%.*]] = fcmp oeq double [[TMP161]], [[TMP162]] +// SIMD-ONLY0-NEXT: br i1 [[CMP156]], label [[COND_TRUE157:%.*]], label [[COND_FALSE158:%.*]] +// SIMD-ONLY0: cond.true157: +// SIMD-ONLY0-NEXT: [[TMP163:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159:%.*]] +// SIMD-ONLY0: cond.false158: +// SIMD-ONLY0-NEXT: [[TMP164:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END159]] +// SIMD-ONLY0: cond.end159: +// SIMD-ONLY0-NEXT: [[COND160:%.*]] = phi double [ [[TMP163]], [[COND_TRUE157]] ], [ [[TMP164]], [[COND_FALSE158]] ] +// SIMD-ONLY0-NEXT: store double [[COND160]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP165:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP166:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP161:%.*]] = fcmp ogt double [[TMP165]], [[TMP166]] +// SIMD-ONLY0-NEXT: br i1 [[CMP161]], label [[COND_TRUE162:%.*]], label [[COND_FALSE163:%.*]] +// SIMD-ONLY0: cond.true162: +// SIMD-ONLY0-NEXT: [[TMP167:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164:%.*]] +// SIMD-ONLY0: cond.false163: +// SIMD-ONLY0-NEXT: [[TMP168:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END164]] +// SIMD-ONLY0: cond.end164: +// SIMD-ONLY0-NEXT: [[COND165:%.*]] = phi double [ [[TMP167]], [[COND_TRUE162]] ], [ [[TMP168]], [[COND_FALSE163]] ] +// SIMD-ONLY0-NEXT: store double [[COND165]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP169:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP169]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP170:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP171:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP166:%.*]] = fcmp olt double [[TMP170]], [[TMP171]] +// SIMD-ONLY0-NEXT: br i1 [[CMP166]], label [[COND_TRUE167:%.*]], label [[COND_FALSE168:%.*]] +// SIMD-ONLY0: cond.true167: +// SIMD-ONLY0-NEXT: [[TMP172:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169:%.*]] +// SIMD-ONLY0: cond.false168: +// SIMD-ONLY0-NEXT: [[TMP173:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END169]] +// SIMD-ONLY0: cond.end169: +// SIMD-ONLY0-NEXT: [[COND170:%.*]] = phi double [ [[TMP172]], [[COND_TRUE167]] ], [ [[TMP173]], [[COND_FALSE168]] ] +// SIMD-ONLY0-NEXT: store double [[COND170]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP174:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP174]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP175:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP176:%.*]] = load double, ptr [[DE]], align 8 +// SIMD-ONLY0-NEXT: [[CMP171:%.*]] = fcmp oeq double [[TMP175]], [[TMP176]] +// SIMD-ONLY0-NEXT: br i1 [[CMP171]], label [[COND_TRUE172:%.*]], label [[COND_FALSE173:%.*]] +// SIMD-ONLY0: cond.true172: +// SIMD-ONLY0-NEXT: [[TMP177:%.*]] = load double, ptr [[DD]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174:%.*]] +// SIMD-ONLY0: cond.false173: +// SIMD-ONLY0-NEXT: [[TMP178:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: br label [[COND_END174]] +// SIMD-ONLY0: cond.end174: +// SIMD-ONLY0-NEXT: [[COND175:%.*]] = phi double [ [[TMP177]], [[COND_TRUE172]] ], [ [[TMP178]], [[COND_FALSE173]] ] +// SIMD-ONLY0-NEXT: store double [[COND175]], ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: [[TMP179:%.*]] = load double, ptr [[DX]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP179]], ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: [[TMP180:%.*]] = load double, ptr [[DV]], align 8 +// SIMD-ONLY0-NEXT: ret double [[TMP180]] +// diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -84,6 +84,7 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -104,39 +105,46 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I24:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_36:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_37:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_38:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[ARGV_ADDR]], ptr [[ARGC_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -144,8 +152,8 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -158,28 +166,28 @@ // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_3]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_4]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_4]], ptr [[DOTOMP_SECTIONS_LB_1]], ptr [[DOTOMP_SECTIONS_UB_2]], ptr [[DOTOMP_SECTIONS_ST_3]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_5]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] // CHECK1: omp.inner.for.cond6: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_2]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END18:%.*]] // CHECK1: omp.inner.for.body8: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT15:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE9:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE12:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case9: -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT10:%.*]], label [[DOTCANCEL_CONTINUE11:%.*]] // CHECK1: .cancel.exit10: // CHECK1-NEXT: br label [[CANCEL_EXIT19:%.*]] // CHECK1: cancel.exit: @@ -188,9 +196,9 @@ // CHECK1: .cancel.continue11: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT15]] // CHECK1: .omp.sections.case12: -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT13:%.*]], label [[DOTCANCEL_CONTINUE14:%.*]] // CHECK1: .cancel.exit13: // CHECK1-NEXT: br label [[CANCEL_EXIT19]] // CHECK1: .cancel.continue14: @@ -198,8 +206,8 @@ // CHECK1: .omp.sections.exit15: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] // CHECK1: omp.inner.for.inc16: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 -// CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_5]], align 4 +// CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[INC17]], ptr [[DOTOMP_SECTIONS_IV_5]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND6]] // CHECK1: omp.inner.for.end18: @@ -207,57 +215,57 @@ // CHECK1-NEXT: br label [[CANCEL_CONT20:%.*]] // CHECK1: cancel.cont20: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB5]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB22:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB22]], ptr [[DOTCAPTURE_EXPR_21]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP23:%.*]] = icmp slt i32 0, [[TMP25]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP23:%.*]] = icmp slt i32 0, [[TMP27]] // CHECK1-NEXT: br i1 [[CMP23]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 -// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: [[CMP25:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP25]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_21]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE]] ], [ [[TMP30]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND26:%.*]] // CHECK1: omp.inner.for.cond26: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP27:%.*]] = icmp sle i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP27:%.*]] = icmp sle i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: br i1 [[CMP27]], label [[OMP_INNER_FOR_BODY28:%.*]], label [[OMP_INNER_FOR_END33:%.*]] // CHECK1: omp.inner.for.body28: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP36]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I24]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load float, ptr @flag, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP35]], 0.000000e+00 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr @flag, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP37]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTCANCEL_EXIT29:%.*]], label [[DOTCANCEL_CONTINUE30:%.*]] // CHECK1: .cancel.exit29: // CHECK1-NEXT: br label [[CANCEL_EXIT34:%.*]] // CHECK1: cancel.exit19: @@ -272,8 +280,8 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC31:%.*]] // CHECK1: omp.inner.for.inc31: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP40]], 1 // CHECK1-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND26]] // CHECK1: omp.inner.for.end33: @@ -288,72 +296,77 @@ // CHECK1-NEXT: br label [[CANCEL_CONT35]] // CHECK1: cancel.cont35: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP39:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP39]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP39]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[TMP41:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP41]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP41]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_36]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_37]]) // CHECK1-NEXT: store i32 0, ptr [[R]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, ptr [[ARGC_ADDR]], ptr [[R]]) -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: ret i32 [[TMP43]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_38]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_38]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[R]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_38]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: ret i32 [[TMP46]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr @flag, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr @flag, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP5]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP4]], i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP7]], i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP11]]) // CHECK1-NEXT: br label [[RETURN:%.*]] // CHECK1: .cancel.continue: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP10]] to i8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP11]], i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP13]] to i8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 0 // CHECK1-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX1]], align 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[RETURN]] // CHECK1: .cancel.continue3: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds ptr, ptr [[TMP18]], i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[ARRAYIDX4]], align 8 -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP20]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds ptr, ptr [[TMP21]], i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[ARRAYIDX4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[TMP22]], i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV6]], [[TMP20]] // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV7]], ptr [[ARRAYIDX5]], align 1 // CHECK1-NEXT: br label [[RETURN]] @@ -391,11 +404,11 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP11]], i32 4) -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP9]], i32 4) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: // CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -408,10 +421,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -419,34 +433,36 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -454,25 +470,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -480,43 +497,45 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue3: @@ -524,27 +543,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -554,112 +572,114 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[R3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[R3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: store i32 0, ptr [[R]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP17]], i32 2) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP20]], i32 2) +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[R3]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[R]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[R]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[R3]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB7:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[R]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB7:[0-9]+]], i32 [[TMP30]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB7]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[R]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB7]], i32 [[TMP30]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB6]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP35]] monotonic, align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[R]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP36]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -677,15 +697,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -715,7 +735,10 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I36:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_49:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_50:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -928,15 +951,19 @@ // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM46]]) // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM47:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB14:[0-9]+]]) // CHECK3-NEXT: [[TMP36:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM47]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP36]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP36]], i32 0, i32 0 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM48:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB14]]) -// CHECK3-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], ptr [[TMP36]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM48]], ptr [[TMP36]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_49]]) // CHECK3-NEXT: store i32 0, ptr [[R]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..3, ptr [[ARGC_ADDR]], ptr [[R]]) -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: ret i32 [[TMP40]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_50]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP39]], align 8 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_50]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[R]], ptr [[TMP40]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_50]]) +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK3-NEXT: ret i32 [[TMP41]] // // // CHECK3-LABEL: define {{[^@]+}}@main..omp_par @@ -1032,11 +1059,11 @@ // CHECK3-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12:[0-9]+]]) -// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK3: .cancel.exit.i: // CHECK3-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]] @@ -1049,10 +1076,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1060,34 +1088,36 @@ // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB17:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB15:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0 -// CHECK3-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 +// CHECK3-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: @@ -1095,8 +1125,8 @@ // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1112,10 +1142,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1123,44 +1154,46 @@ // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB21:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 1 -// CHECK3-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 1 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 1 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP6]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_SECTIONS_CASE_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE_CNCL:%.*]] // CHECK3: .omp.sections.case.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case.cncl: // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.sections.case2: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) -// CHECK3-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 3) +// CHECK3-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 0 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_SECTIONS_CASE2_SPLIT:%.*]], label [[DOTOMP_SECTIONS_CASE2_CNCL:%.*]] // CHECK3: .omp.sections.case2.split: // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_CASE2_SECTION_AFTER:%.*]] // CHECK3: .omp.sections.case2.section.after: @@ -1170,8 +1203,8 @@ // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1187,12 +1220,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[R:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1202,107 +1234,109 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[R3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[R:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[R_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[R3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[R]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]) // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB27:[0-9]+]]) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], i32 2) -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB27:[0-9]+]]) +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM6]], i32 2) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[R3]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[R]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[R]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29:[0-9]+]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[R3]], ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM12:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29:[0-9]+]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[R]], ptr [[TMP22]], align 8 +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB30:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM11]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM12]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[R]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD12]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB30]], i32 [[OMP_GLOBAL_THREAD_NUM11]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB29]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[R3]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP25]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[R]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1320,14 +1354,14 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/cancellation_point_codegen.cpp b/clang/test/OpenMP/cancellation_point_codegen.cpp --- a/clang/test/OpenMP/cancellation_point_codegen.cpp +++ b/clang/test/OpenMP/cancellation_point_codegen.cpp @@ -86,6 +86,7 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -106,45 +107,52 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I28:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED42:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED42:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_43:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_44:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_45:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[ARGV_ADDR]], ptr [[ARGC_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP7]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: @@ -152,8 +160,8 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -166,28 +174,28 @@ // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_5]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_6]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_SECTIONS_IL_6]], ptr [[DOTOMP_SECTIONS_LB_3]], ptr [[DOTOMP_SECTIONS_UB_4]], ptr [[DOTOMP_SECTIONS_ST_5]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 -// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 1 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_3]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_SECTIONS_IV_7]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK1: omp.inner.for.cond8: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_4]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END22:%.*]] // CHECK1: omp.inner.for.body10: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT19:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT19:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE11:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE14:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case11: -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT12:%.*]], label [[DOTCANCEL_CONTINUE13:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT12:%.*]], label [[DOTCANCEL_CONTINUE13:%.*]] // CHECK1: .cancel.exit12: // CHECK1-NEXT: br label [[CANCEL_EXIT23:%.*]] // CHECK1: cancel.exit: @@ -196,15 +204,15 @@ // CHECK1: .cancel.continue13: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT19]] // CHECK1: .omp.sections.case14: -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT15:%.*]], label [[DOTCANCEL_CONTINUE16:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTCANCEL_EXIT15:%.*]], label [[DOTCANCEL_CONTINUE16:%.*]] // CHECK1: .cancel.exit15: // CHECK1-NEXT: br label [[CANCEL_EXIT23]] // CHECK1: .cancel.continue16: -// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTCANCEL_EXIT17:%.*]], label [[DOTCANCEL_CONTINUE18:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 3) +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTCANCEL_EXIT17:%.*]], label [[DOTCANCEL_CONTINUE18:%.*]] // CHECK1: .cancel.exit17: // CHECK1-NEXT: br label [[CANCEL_EXIT23]] // CHECK1: .cancel.continue18: @@ -212,8 +220,8 @@ // CHECK1: .omp.sections.exit19: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] // CHECK1: omp.inner.for.inc20: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 -// CHECK1-NEXT: [[INC21:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_7]], align 4 +// CHECK1-NEXT: [[INC21:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[INC21]], ptr [[DOTOMP_SECTIONS_IV_7]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND8]] // CHECK1: omp.inner.for.end22: @@ -221,62 +229,62 @@ // CHECK1-NEXT: br label [[CANCEL_CONT24:%.*]] // CHECK1: cancel.cont24: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB26:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB26]], ptr [[DOTCAPTURE_EXPR_25]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP27:%.*]] = icmp slt i32 0, [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP27:%.*]] = icmp slt i32 0, [[TMP31]] // CHECK1-NEXT: br i1 [[CMP27]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB5:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 -// CHECK1-NEXT: [[CMP29:%.*]] = icmp sgt i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: [[CMP29:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: br i1 [[CMP29]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_25]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP33]], [[COND_TRUE]] ], [ [[TMP34]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE]] ], [ [[TMP36]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP35]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] // CHECK1: omp.inner.for.cond30: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP31:%.*]] = icmp sle i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP31:%.*]] = icmp sle i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END39:%.*]] // CHECK1: omp.inner.for.body32: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP38]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP40]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I28]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 -// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTCANCEL_EXIT33:%.*]], label [[DOTCANCEL_CONTINUE34:%.*]] +// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTCANCEL_EXIT33:%.*]], label [[DOTCANCEL_CONTINUE34:%.*]] // CHECK1: .cancel.exit33: // CHECK1-NEXT: br label [[CANCEL_EXIT40:%.*]] // CHECK1: cancel.exit23: // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[CANCEL_CONT24]] // CHECK1: .cancel.continue34: -// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) -// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 -// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTCANCEL_EXIT35:%.*]], label [[DOTCANCEL_CONTINUE36:%.*]] +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP0]], i32 2) +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[DOTCANCEL_EXIT35:%.*]], label [[DOTCANCEL_CONTINUE36:%.*]] // CHECK1: .cancel.exit35: // CHECK1-NEXT: br label [[CANCEL_EXIT40]] // CHECK1: .cancel.continue36: @@ -284,8 +292,8 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC37:%.*]] // CHECK1: omp.inner.for.inc37: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK1-NEXT: store i32 [[ADD38]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND30]] // CHECK1: omp.inner.for.end39: @@ -300,54 +308,57 @@ // CHECK1-NEXT: br label [[CANCEL_CONT41]] // CHECK1: cancel.cont41: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP44:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP44]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP44]]) -// CHECK1-NEXT: [[TMP48:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..3) -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP48]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP51:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP48]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..5) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[ARGC_ADDR]]) -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: ret i32 [[TMP52]] +// CHECK1-NEXT: [[TMP46:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP46]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP46]]) +// CHECK1-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..3) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP49]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP51:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP49]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_43]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_44]]) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_45]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_45]]) +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: ret i32 [[TMP53]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP3]], i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP6]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: br label [[RETURN:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP3]], i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP6]], i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: br label [[RETURN]] // CHECK1: .cancel.continue2: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP10]] to i8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP11]], i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP13]] to i8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 0 // CHECK1-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX3]], align 1 // CHECK1-NEXT: br label [[RETURN]] // CHECK1: return: @@ -384,18 +395,18 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP11]], i32 4) -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP9]], i32 4) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: // CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK1: .cancel.continue.i: -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP11]], i32 4) -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT1_I:%.*]], label [[DOTCANCEL_CONTINUE2_I:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP9]], i32 4) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1_I:%.*]], label [[DOTCANCEL_CONTINUE2_I:%.*]] // CHECK1: .cancel.exit1.i: // CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] @@ -423,7 +434,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -437,11 +448,11 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP11]], i32 4) -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP9]], i32 4) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: // CHECK1-NEXT: store i32 1, ptr [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !24 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT:%.*]] @@ -454,10 +465,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -465,40 +477,42 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: @@ -506,25 +520,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -532,49 +547,51 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE3:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT1:%.*]], label [[DOTCANCEL_CONTINUE2:%.*]] // CHECK1: .cancel.exit1: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue2: // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case3: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT4:%.*]], label [[DOTCANCEL_CONTINUE5:%.*]] // CHECK1: .cancel.exit4: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue5: @@ -582,26 +599,26 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -614,67 +631,69 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB5]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB5]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP16]], i32 2) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB1]], i32 [[TMP18]], i32 2) +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP20]], i32 2) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT6:%.*]], label [[DOTCANCEL_CONTINUE7:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP22]], i32 2) +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTCANCEL_EXIT6:%.*]], label [[DOTCANCEL_CONTINUE7:%.*]] // CHECK1: .cancel.exit6: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue7: @@ -682,21 +701,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5]], i32 [[TMP25]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: cancel.exit: // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5]], i32 [[TMP27]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: cancel.exit: +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] diff --git a/clang/test/OpenMP/debug-info-complex-byval.cpp b/clang/test/OpenMP/debug-info-complex-byval.cpp --- a/clang/test/OpenMP/debug-info-complex-byval.cpp +++ b/clang/test/OpenMP/debug-info-complex-byval.cpp @@ -15,45 +15,37 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, ptr [[B]], align 4, !dbg [[DBG14:![0-9]+]] -// CHECK1-NEXT: store { float, float } [[TMP0]], ptr [[B_CASTED]], align 4, !dbg [[DBG14]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_CASTED]], align 8, !dbg [[DBG14]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]), !dbg [[DBG14]] -// CHECK1-NEXT: ret void, !dbg [[DBG15:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG16:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG29:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG14:![0-9]+]] +// CHECK1-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0, !dbg [[DBG15:![0-9]+]] +// CHECK1-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4, !dbg [[DBG15]] +// CHECK1-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1, !dbg [[DBG15]] +// CHECK1-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4, !dbg [[DBG15]] +// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG14]] +// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG14]] +// CHECK1-NEXT: store float [[B_REAL]], ptr [[DOTREALP]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: store float [[B_IMAG]], ptr [[DOTIMAGP]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG14]] +// CHECK1-NEXT: ret void, !dbg [[DBG17:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG30:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG18:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG38:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG38]] -// CHECK1-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[B_ADDR]], align 4, !dbg [[DBG38]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(ptr [[TMP0]], ptr [[TMP1]], <2 x float> [[TMP2]]) #[[ATTR4:[0-9]+]], !dbg [[DBG38]] -// CHECK1-NEXT: ret void, !dbg [[DBG38]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META32:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG34]] +// CHECK1-NEXT: [[TMP2:%.*]] = load { float, float }, ptr [[TMP1]], align 4, !dbg [[DBG34]] +// CHECK1-NEXT: store { float, float } [[TMP2]], ptr [[B]], align 4, !dbg [[DBG34]] +// CHECK1-NEXT: ret void, !dbg [[DBG35:![0-9]+]] // diff --git a/clang/test/OpenMP/debug-info-openmp-array.cpp b/clang/test/OpenMP/debug-info-openmp-array.cpp --- a/clang/test/OpenMP/debug-info-openmp-array.cpp +++ b/clang/test/OpenMP/debug-info-openmp-array.cpp @@ -20,31 +20,36 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[M_ADDR]], align 4, !dbg [[DBG15:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG16:![0-9]+]] -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG16]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG16]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG16]] -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG16]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[M_ADDR]], i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG25:![0-9]+]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG26:![0-9]+]] -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP3]]), !dbg [[DBG26]] -// CHECK1-NEXT: ret void, !dbg [[DBG26]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[M_ADDR]], align 4, !dbg [[DBG16:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG17:![0-9]+]] +// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG17]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG17]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG17]] +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG17]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG26:![0-9]+]] +// CHECK1-NEXT: store ptr [[M_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG26]] +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG26]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP5]], align 8, !dbg [[DBG26]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG26]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG27:![0-9]+]] +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP6]]), !dbg [[DBG27]] +// CHECK1-NEXT: ret void, !dbg [[DBG27]] // // -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG27:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG28:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CEN_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -56,123 +61,93 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG39:![0-9]+]] -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store ptr [[CEN]], ptr [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[CEN_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG42:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG43]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG46:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_1]], metadata [[META45]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0, !dbg [[DBG43]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG43]] -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG43]] -// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG48:![0-9]+]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]], !dbg [[DBG43]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_1]], metadata [[META45]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0, !dbg [[DBG47:![0-9]+]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG46]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]], !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG43]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG50:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I3]], metadata [[META47]], metadata !DIExpression()), !dbg [[DBG36]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG54:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]], !dbg [[DBG50]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG50]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I3]], metadata [[META48]], metadata !DIExpression()), !dbg [[DBG40]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG43]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]], !dbg [[DBG51]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG51]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG50]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG51]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG50]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG51]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG50]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG50]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ], !dbg [[DBG51]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]], !dbg [[DBG47]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1, !dbg [[DBG48]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG48]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG55:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG57:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG58:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]], !dbg [[DBG58]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[ARRAYIDX]], align 4, !dbg [[DBG59:![0-9]+]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1, !dbg [[DBG49]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG49]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !dbg [[DBG49]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !dbg [[DBG57:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG58:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]], !dbg [[DBG58]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !dbg [[DBG59:![0-9]+]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG60:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG54]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG43]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG50]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1, !dbg [[DBG43]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG54]], !llvm.loop [[LOOP61:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG51]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG47]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG47]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG54]] +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG43]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG54]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !dbg [[DBG54]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP21]]), !dbg [[DBG62:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG54]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !dbg [[DBG43]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP25]]), !dbg [[DBG62:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG43]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // -// -// CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3]] !dbg [[DBG64:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CEN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: store ptr [[CEN]], ptr [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[CEN_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG71:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG71]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]], i64 [[TMP1]], ptr [[TMP6]]) #[[ATTR4:[0-9]+]], !dbg [[DBG71]] -// CHECK1-NEXT: ret void, !dbg [[DBG71]] -// -// \ No newline at end of file diff --git a/clang/test/OpenMP/debug_threadprivate_copyin.c b/clang/test/OpenMP/debug_threadprivate_copyin.c --- a/clang/test/OpenMP/debug_threadprivate_copyin.c +++ b/clang/test/OpenMP/debug_threadprivate_copyin.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // This testcase checks emission of debug info for threadprivate variables // present in any clause of OpenMP construct. @@ -6,13 +7,6 @@ // RUN: %clang_cc1 -debug-info-kind=constructor -x c -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s // expected-no-diagnostics -// CHECK: define internal void @.omp_outlined._debug__( -// CHECK: call void @llvm.dbg.declare(metadata ptr %.global_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %.bound_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %nt.addr -// CHECK: store ptr %gbl_dynamic_int, ptr %gbl_dynamic_int.addr, align 8 -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_dynamic_int.addr -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_static_int.addr extern int printf(const char *, ...); extern void omp_set_num_threads(int); @@ -24,6 +18,33 @@ #pragma omp threadprivate(gbl_dynamic_int) +// CHECK-LABEL: @main( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[NT:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OFFSET:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[NT]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +// CHECK-NEXT: store i32 0, ptr [[NT]], align 4, !dbg [[DBG24]] +// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[OFFSET]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] +// CHECK-NEXT: store i32 10, ptr [[OFFSET]], align 4, !dbg [[DBG26]] +// CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_dynamic_int), !dbg [[DBG27:![0-9]+]] +// CHECK-NEXT: store i32 55, ptr [[TMP0]], align 4, !dbg [[DBG28:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_static_int), !dbg [[DBG29:![0-9]+]] +// CHECK-NEXT: store i32 77, ptr [[TMP1]], align 4, !dbg [[DBG30:![0-9]+]] +// CHECK-NEXT: call void @omp_set_num_threads(i32 noundef 4), !dbg [[DBG31:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG32:![0-9]+]] +// CHECK-NEXT: store ptr [[NT]], ptr [[TMP2]], align 8, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP4:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_dynamic_int), !dbg [[DBG33:![0-9]+]] +// CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG32]] +// CHECK-NEXT: [[TMP6:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @gbl_static_int), !dbg [[DBG35:![0-9]+]] +// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8, !dbg [[DBG32]] +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG32]] +// CHECK-NEXT: ret i32 0, !dbg [[DBG36:![0-9]+]] +// int main() { int nt = 0; int offset = 10; diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -28,7 +28,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -36,9 +37,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -46,20 +52,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR6]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR7]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -77,7 +85,7 @@ // CHECK1-SAME: () #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR7]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4) // CHECK1-NEXT: ret i32 [[CALL]] // diff --git a/clang/test/OpenMP/declare_variant_construct_codegen_1.c b/clang/test/OpenMP/declare_variant_construct_codegen_1.c --- a/clang/test/OpenMP/declare_variant_construct_codegen_1.c +++ b/clang/test/OpenMP/declare_variant_construct_codegen_1.c @@ -72,13 +72,13 @@ { vxv(v1, v2, v3, N); } -// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[PARALLEL_REGION:@.+]] to void +// CK1: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void ({{.+}})* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[PARALLEL_REGION:@.+]] to void return 0; } // CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}}) -// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, [100 x i32]*, [100 x i32]*, [100 x i32]*)* [[TARGET_REGION:@.+]] to void +// CK1: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 1, void ({{.+}})* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[TARGET_REGION:@.+]] to void // CK1: define internal void [[TARGET_REGION]]( // CK1: call void @t_vxv @@ -167,11 +167,11 @@ { test_base(v1, v2, v3, 0); } -// CK2: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[PARALLEL_REGION:@.+]] to void +// CK2: call void ({{.+}}) @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[PARALLEL_REGION:@.+]] to void } // CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}}) -// CK2: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 3, void ({{.+}})* bitcast (void (i32*, i32*, i32****, i32****, i32****)* [[TARGET_REGION_1:@.+]] to void +// CK2: call void ({{.+}}) @__kmpc_fork_teams(%struct.ident_t* {{.+}}, i32 1, void ({{.+}})* bitcast (void (i32*, i32*, %struct.anon{{[\.0-9]*}}*)* [[TARGET_REGION_1:@.+]] to void // CK2: define internal void [[TARGET_REGION_1]]( // CK2: call void @test_teams diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -137,52 +137,52 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -197,23 +197,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -223,79 +229,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -320,52 +328,52 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -380,23 +388,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -406,79 +420,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -503,52 +519,52 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 16908289, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 16908289, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -563,23 +579,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -589,96 +611,98 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] -// CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] +// CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -699,47 +723,47 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[A]], align 1 -// CHECK1-NEXT: store i8 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP10]] to i32 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV2]] -// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK1-NEXT: store i8 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] +// CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[ADD5]] to i64 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[ADD4]] to i64 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92(i64 [[TMP1]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -751,17 +775,20 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -774,59 +801,61 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK1-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -834,16 +863,16 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -871,34 +900,34 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108(i64 [[TMP1]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -910,17 +939,20 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -930,70 +962,72 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK1-NEXT: ret void // // @@ -1025,52 +1059,52 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1085,23 +1119,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1111,75 +1151,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1204,52 +1246,52 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1264,23 +1306,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1290,75 +1338,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1383,52 +1433,52 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 16908289, ptr [[TMP34]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 16908289, ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1443,23 +1493,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1469,92 +1525,94 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1575,47 +1633,47 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[A]], align 1 -// CHECK3-NEXT: store i8 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP10]] to i32 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV2]] -// CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[A]], align 1 +// CHECK3-NEXT: store i8 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] +// CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[ADD5]] to i64 +// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[ADD4]] to i64 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 [[TMP12]], ptr [[TMP21]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 [[TMP10]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92(i32 [[TMP1]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1627,17 +1685,20 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[A_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -1650,59 +1711,61 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK3-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -1710,16 +1773,16 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1747,34 +1810,34 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 100, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 100, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108(i32 [[TMP1]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1786,17 +1849,20 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1806,70 +1872,72 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: ret void // // @@ -1887,23 +1955,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1913,79 +1987,81 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -1996,23 +2072,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2022,79 +2104,81 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2105,23 +2189,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2131,96 +2221,98 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] -// CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK17-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] +// CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2228,17 +2320,20 @@ // CHECK17-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[A_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -2251,59 +2346,61 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK17-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK17-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK17-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK17-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK17-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK17-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -2311,16 +2408,16 @@ // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -2330,17 +2427,20 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2350,70 +2450,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK17-NEXT: ret void // // @@ -2424,23 +2526,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2450,75 +2558,77 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2529,23 +2639,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2555,75 +2671,77 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2634,23 +2752,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2660,92 +2784,94 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] -// CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK19-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] +// CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 +// CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK19-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2753,17 +2879,20 @@ // CHECK19-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[A_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 @@ -2776,59 +2905,61 @@ // CHECK19-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK19-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK19-NEXT: store i8 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 // CHECK19-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK19-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: store i8 [[TMP5]], ptr [[I]], align 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP6]] to i32 // CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP17]] to i32 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] // CHECK19-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 // CHECK19-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1 @@ -2836,16 +2967,16 @@ // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -2855,17 +2986,20 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2875,69 +3009,71 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK19-NEXT: ret void // diff --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp @@ -176,25 +176,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -203,104 +209,106 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD9]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK1-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD5]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK1-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -340,6 +348,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -353,20 +362,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -375,104 +389,106 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD9]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK3-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD5]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK3-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK3-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: ret void // // @@ -629,6 +645,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -638,21 +655,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -661,117 +684,119 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX12]], ptr align 4 [[TMP20]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -949,6 +974,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -957,20 +983,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -979,112 +1010,114 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP23]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1285,6 +1318,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1294,21 +1328,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1317,115 +1357,117 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP21]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP20]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1603,6 +1645,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1611,20 +1654,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1633,110 +1681,112 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp @@ -166,25 +166,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -193,99 +199,101 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP27]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP28]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, ptr [[TMP29]], align 8 +// CHECK1-NEXT: store volatile double [[TMP30]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP32]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -327,6 +335,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -340,20 +349,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -362,99 +376,101 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 4 -// CHECK3-NEXT: store volatile double [[TMP25]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP27]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP28]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[TMP29]], align 4 +// CHECK3-NEXT: store volatile double [[TMP30]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP32]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -614,6 +630,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -623,21 +640,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -646,33 +669,35 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -682,99 +707,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP32]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -952,6 +977,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -960,20 +986,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -982,30 +1013,32 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1015,97 +1048,97 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP29]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP30]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1307,6 +1340,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1316,21 +1350,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1339,33 +1379,35 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1375,97 +1417,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP23]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done13: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP32]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done15: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // @@ -1643,6 +1685,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1651,20 +1694,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1673,30 +1721,32 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1706,95 +1756,95 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP21]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP29]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP30]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -801,23 +801,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -827,93 +833,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -924,110 +944,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1040,23 +1064,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1066,93 +1096,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1163,110 +1207,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1280,25 +1328,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1308,120 +1363,134 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1432,110 +1501,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1548,23 +1621,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1574,93 +1653,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1671,110 +1764,114 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1788,25 +1885,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1817,102 +1921,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1923,131 +2040,137 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -2060,23 +2183,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2086,93 +2215,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2183,99 +2326,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK1-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2296,25 +2443,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2325,102 +2479,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2431,101 +2598,107 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP42]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 8, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2579,23 +2752,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2605,91 +2784,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2700,105 +2893,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -2811,23 +3008,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2837,91 +3040,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2932,105 +3149,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3044,25 +3265,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3072,118 +3300,132 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3194,105 +3436,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3305,23 +3551,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3331,91 +3583,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3426,105 +3692,109 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3538,25 +3808,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3567,100 +3844,113 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3671,124 +3961,130 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK3-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK3-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3801,23 +4097,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3827,91 +4129,105 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3922,94 +4238,98 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK3-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4030,25 +4350,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -4059,100 +4386,113 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4163,96 +4503,102 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_19:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK3-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP42]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4850,23 +5196,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4876,93 +5228,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4975,98 +5341,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5079,23 +5449,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5105,93 +5481,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5204,98 +5594,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5309,25 +5703,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5337,120 +5738,134 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5463,98 +5878,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5567,23 +5986,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5593,93 +6018,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5692,98 +6131,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -5797,25 +6240,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -5826,102 +6276,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5934,119 +6397,125 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -6059,23 +6528,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6085,93 +6560,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6184,87 +6673,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -6285,25 +6778,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6314,102 +6814,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6422,89 +6935,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -7092,23 +7611,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7118,93 +7643,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7217,111 +7756,115 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP21]], i32 2) -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP30]], i32 2) +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK9: .cancel.continue: -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP29]] -// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: cancel.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: omp.precond.end: // CHECK9-NEXT: br label [[CANCEL_CONT]] @@ -7336,23 +7879,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7362,93 +7911,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7461,98 +8024,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -7566,25 +8133,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7594,120 +8168,134 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7720,98 +8308,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -7824,23 +8416,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7850,93 +8448,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7949,98 +8561,102 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -8054,25 +8670,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8083,102 +8706,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8191,119 +8827,125 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX15]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -8316,23 +8958,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8342,93 +8990,107 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8441,87 +9103,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -8542,25 +9208,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8571,102 +9244,115 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8679,89 +9365,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -9359,23 +10051,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9385,91 +10083,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9482,93 +10194,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -9581,23 +10297,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9607,91 +10329,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9704,93 +10440,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -9804,25 +10544,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9832,118 +10579,132 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9956,93 +10717,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10055,23 +10820,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10081,91 +10852,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10178,93 +10963,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10278,25 +11067,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10307,100 +11103,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10413,112 +11222,118 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -10531,23 +11346,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10557,91 +11378,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10654,82 +11489,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -10750,25 +11589,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -10779,100 +11625,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10885,84 +11744,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11550,23 +12415,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11576,91 +12447,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11673,106 +12558,110 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP21]], i32 2) -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP30]], i32 2) +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: // CHECK11-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK11: .cancel.continue: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP29]] -// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: cancel.exit: -// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK11: omp.precond.end: // CHECK11-NEXT: br label [[CANCEL_CONT]] @@ -11787,23 +12676,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11813,91 +12708,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11910,93 +12819,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12010,25 +12923,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12038,118 +12958,132 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12162,93 +13096,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12261,23 +13199,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12287,91 +13231,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12384,93 +13342,97 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12484,25 +13446,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -12513,100 +13482,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12619,112 +13601,118 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX10]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -12737,23 +13725,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12763,91 +13757,105 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12860,82 +13868,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -12956,25 +13968,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -12985,100 +14004,113 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13091,84 +14123,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -289,25 +289,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -316,109 +322,115 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store double [[TMP21]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store double [[TMP24]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store double [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load volatile double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: store double [[TMP32]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP36]], ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -427,80 +439,92 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP23]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -540,6 +564,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -553,20 +578,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -575,103 +605,111 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4 -// CHECK3-NEXT: store double [[TMP20]], ptr [[G1_CASTED]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[G1_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP24]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load volatile double, ptr [[TMP28]], align 4 +// CHECK3-NEXT: store double [[TMP29]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP33]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], i32 noundef [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -679,83 +717,93 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store double [[TMP8]], ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP3]], ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP6]], align 8 +// CHECK3-NEXT: store double [[TMP15]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP12]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: ret void // // @@ -912,6 +960,7 @@ // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -921,21 +970,27 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -944,131 +999,142 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK8-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK8-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: store i32 [[TMP21]], ptr [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR9]], align 4 -// CHECK8-NEXT: store i32 [[TMP24]], ptr [[SVAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP25:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[VEC4]], i64 [[TMP22]], ptr [[S_ARR5]], ptr [[TMP23]], i64 [[TMP25]]) +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK8-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK8-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP29]], align 8 +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP32]], align 8 +// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK8-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK8-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done12: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1076,115 +1142,123 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK8-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK8-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP29]], i64 4, i1 false) // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1202,10 +1276,10 @@ // CHECK8-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK8-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1216,9 +1290,9 @@ // CHECK8-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK8-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK8-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1282,12 +1356,12 @@ // CHECK8-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK8: omp_offload.cont: // CHECK8-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1362,6 +1436,7 @@ // CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1370,20 +1445,25 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1392,121 +1472,133 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], ptr [[VEC4]], i64 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]]) +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK8-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK8-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8 +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done11: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1514,114 +1606,120 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK8-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1641,7 +1739,7 @@ // CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: store i32 0, ptr [[F]], align 4 // CHECK8-NEXT: ret void // @@ -1654,7 +1752,7 @@ // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK8-NEXT: ret void @@ -1822,6 +1920,7 @@ // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1831,21 +1930,27 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1854,129 +1959,140 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK10-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR9]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[VEC4]], i32 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]], i32 [[TMP23]]) +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK10-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK10-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done12: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1984,111 +2100,119 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP30]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP29]], i32 4, i1 false) // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2106,10 +2230,10 @@ // CHECK10-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK10-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2120,9 +2244,9 @@ // CHECK10-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK10-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK10-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2186,12 +2310,12 @@ // CHECK10-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK10: omp_offload.cont: // CHECK10-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2266,6 +2390,7 @@ // CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2274,20 +2399,25 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2296,119 +2426,131 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP17]], ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], ptr [[VEC4]], i32 [[TMP18]], ptr [[S_ARR5]], ptr [[TMP19]]) +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK10-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2416,110 +2558,116 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK10-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2539,7 +2687,7 @@ // CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: store i32 0, ptr [[F]], align 4 // CHECK10-NEXT: ret void // @@ -2552,7 +2700,7 @@ // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK10-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -183,77 +183,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -263,139 +275,155 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -405,43 +433,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -449,14 +481,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -529,117 +561,129 @@ // CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK1-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] // CHECK1: omp_offload.failed7: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l103(i64 [[TMP23]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] // CHECK1: omp_offload.cont8: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP42]]) +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP40]]) // CHECK1-NEXT: ret i32 [[CALL]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -649,43 +693,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -693,96 +741,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -792,43 +852,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -836,14 +900,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -851,95 +915,108 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -949,43 +1026,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -993,14 +1074,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1073,34 +1154,34 @@ // CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP32]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l75.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK1-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l75.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] // CHECK1: omp_offload.failed7: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l75(i64 [[TMP23]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] @@ -1111,77 +1192,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l63 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1191,43 +1284,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1235,96 +1332,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l69 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1334,43 +1443,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1378,14 +1491,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1393,95 +1506,108 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1491,43 +1617,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1535,14 +1665,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp @@ -230,25 +230,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -257,100 +263,114 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[G3]], ptr [[TMP17]], ptr [[SVAR6]], ptr [[SFVAR7]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -358,105 +378,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -498,6 +522,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -511,20 +536,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -533,98 +563,112 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[G3]], ptr [[TMP15]], ptr [[SVAR6]], ptr [[SFVAR7]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP20]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 4 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP24]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP31]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP35]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -632,103 +676,107 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -888,6 +936,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -897,21 +946,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -920,33 +975,38 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -956,105 +1016,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP18]], ptr [[SVAR8]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP38]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done11: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP40]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP41]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done13: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1062,39 +1132,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1104,99 +1178,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1214,10 +1288,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1228,9 +1302,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1294,12 +1368,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1374,6 +1448,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1382,20 +1457,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1404,135 +1484,149 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1540,136 +1634,140 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1689,7 +1787,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1702,7 +1800,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1871,6 +1969,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1880,21 +1979,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1903,33 +2008,38 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1939,103 +2049,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP16]], ptr [[SVAR8]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP38]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP39]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2043,37 +2163,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2083,97 +2207,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2191,10 +2315,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2205,9 +2329,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2271,12 +2395,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2351,6 +2475,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2359,20 +2484,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2381,133 +2511,147 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP24]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2515,132 +2659,136 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2660,7 +2808,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2673,7 +2821,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp @@ -236,78 +236,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -317,43 +329,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -363,20 +379,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -391,84 +407,97 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -478,43 +507,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -524,20 +557,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // @@ -697,78 +730,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -778,43 +823,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -824,98 +873,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -925,43 +986,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -971,98 +1036,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1072,43 +1149,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1118,70 +1199,77 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -1190,41 +1278,46 @@ // CHECK1-NEXT: [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK1: invoke.cont2: -// CHECK1-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1234,43 +1327,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1280,20 +1377,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK1-NEXT: unreachable // // @@ -1447,78 +1544,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1528,43 +1637,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1574,20 +1687,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK5-NEXT: unreachable // // @@ -1602,84 +1715,97 @@ // CHECK5-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK5-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1689,43 +1815,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1735,20 +1865,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // @@ -1899,78 +2029,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1980,43 +2122,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2026,98 +2172,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2127,43 +2285,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2173,98 +2335,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2274,43 +2448,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2320,70 +2498,77 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -2392,41 +2577,46 @@ // CHECK5-NEXT: [[CALL:%.*]] = invoke noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK5: invoke.cont2: -// CHECK5-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK5-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2436,43 +2626,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2482,20 +2676,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK5-NEXT: unreachable // // @@ -2658,78 +2852,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2739,43 +2945,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -2785,20 +2995,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK9-NEXT: unreachable // // @@ -2813,84 +3023,97 @@ // CHECK9-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK9-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2900,43 +3123,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -2946,20 +3173,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // @@ -3119,78 +3346,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3200,43 +3439,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3246,98 +3489,110 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3347,43 +3602,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3393,98 +3652,110 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3494,43 +3765,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3540,70 +3815,77 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -3612,41 +3894,46 @@ // CHECK9-NEXT: [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK9: invoke.cont2: -// CHECK9-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK9-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3656,43 +3943,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: invoke void @_Z3foov() @@ -3702,20 +3993,20 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK9-NEXT: unreachable // // @@ -3869,78 +4160,90 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK13-SAME: () #[[ATTR3:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3950,43 +4253,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -3996,20 +4303,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10:[0-9]+]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10:[0-9]+]] // CHECK13-NEXT: unreachable // // @@ -4024,84 +4331,97 @@ // CHECK13-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK13-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK13-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4111,43 +4431,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4157,20 +4481,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // @@ -4321,78 +4645,90 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4402,43 +4738,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4448,98 +4788,110 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4549,43 +4901,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4595,98 +4951,110 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4696,43 +5064,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4742,70 +5114,77 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) @@ -4814,41 +5193,46 @@ // CHECK13-NEXT: [[CALL:%.*]] = invoke noundef i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK13-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]] // CHECK13: invoke.cont2: -// CHECK13-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]) +// CHECK13-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]) // CHECK13-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK13-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP14:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP17:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP15:%.*]] = extractvalue { ptr, i32 } [[TMP14]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP15]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP18:%.*]] = extractvalue { ptr, i32 } [[TMP17]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP18]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4858,43 +5242,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK13-NEXT: invoke void @_Z3foov() @@ -4904,20 +5292,20 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR10]] +// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]] // CHECK13-NEXT: unreachable // // diff --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp @@ -151,15 +151,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -167,69 +169,79 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -243,77 +255,81 @@ // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -340,15 +356,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -356,67 +374,77 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -430,75 +458,79 @@ // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4 // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -534,27 +566,27 @@ // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95() #[[ATTR4:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -562,18 +594,18 @@ // CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -602,15 +634,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -618,6 +652,8 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -625,8 +661,11 @@ // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -644,55 +683,61 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -702,12 +747,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -724,15 +768,19 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -749,65 +797,65 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] @@ -830,62 +878,62 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -948,15 +996,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -964,81 +1014,92 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1047,12 +1108,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1062,97 +1122,101 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1176,7 +1240,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1189,7 +1253,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1236,27 +1300,27 @@ // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95() #[[ATTR4:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1264,18 +1328,18 @@ // CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1304,15 +1368,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1320,6 +1386,8 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1327,8 +1395,11 @@ // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -1346,53 +1417,59 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -1402,12 +1479,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1424,15 +1500,19 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1447,63 +1527,63 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1526,62 +1606,62 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1644,15 +1724,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1660,79 +1742,90 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1741,12 +1834,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1756,93 +1848,97 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1866,7 +1962,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1879,7 +1975,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp @@ -119,78 +119,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -200,135 +212,151 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -338,57 +366,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -428,78 +460,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -509,57 +553,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -57,358 +57,378 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP6]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, i64 [[TMP7]], i64 [[TMP8]], ptr [[ARGC_ADDR]], ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 0 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]] -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 9 -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP47]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..3, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP54]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP56]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP37]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP42]] +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP43]], i64 9 +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP44]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = add nuw i64 [[TMP49]], 1 +// CHECK1-NEXT: [[TMP51:%.*]] = mul nuw i64 [[TMP50]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..2, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..3, ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP56]], align 8 // CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9 +// CHECK1-NEXT: [[TMP59:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP58]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP59]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP61]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP62]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP63]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP67]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB2]], i32 [[TMP70]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP75]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP71]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB2]], i32 [[TMP81]], ptr [[TMP71]]) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB2]], i32 [[TMP73]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP77]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB2]], i32 [[TMP82]], ptr [[TMP74]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP88]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP89]], align 8 // CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 // CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to ptr +// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP20]] to ptr // CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 // CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP101]] monotonic, align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP104]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[_TMP28]], align 1 +// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 // CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 +// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 +// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP113]]) +// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) // CHECK1-NEXT: ret void // // @@ -419,8 +439,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -431,12 +451,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -531,59 +551,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -595,38 +615,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -801,23 +801,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -827,83 +833,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -916,16 +941,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -936,116 +956,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1064,23 +1088,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1090,83 +1120,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1179,16 +1228,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1199,116 +1243,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1328,25 +1376,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1356,110 +1411,129 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -1472,16 +1546,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1492,116 +1561,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1620,23 +1693,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1646,83 +1725,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1735,16 +1833,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1755,116 +1848,120 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -1884,25 +1981,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -1913,91 +2017,109 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP37]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -2010,17 +2132,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2031,137 +2148,143 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP43]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 8, !llvm.access.group [[ACC_GRP40]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP40]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP55]], 0 // CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] @@ -2180,23 +2303,29 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2206,83 +2335,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP43]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2295,16 +2443,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2315,99 +2458,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK1-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I4]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP46]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2415,12 +2562,12 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP45]], 0 // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -2440,25 +2587,32 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK1-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2469,91 +2623,109 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK1-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -2566,17 +2738,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2587,101 +2754,107 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_20:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK1-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I5]], ptr [[TMP42]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_20]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -2689,12 +2862,12 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -2747,23 +2920,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2773,81 +2952,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2860,16 +3058,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2880,111 +3073,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3003,23 +3200,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3029,81 +3232,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3116,16 +3338,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3136,111 +3353,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3260,25 +3481,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3288,108 +3516,127 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -3402,16 +3649,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3422,111 +3664,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_7]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3545,23 +3791,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3571,81 +3823,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3658,16 +3929,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3678,111 +3944,115 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_10:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_10]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP46]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -3802,25 +4072,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -3831,89 +4108,107 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -3926,17 +4221,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3947,130 +4237,136 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_13:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP34]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP42]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP43]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP44]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[CLASS_ANON_13]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP45]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP41]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP46]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] // CHECK3-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] // CHECK3-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK3-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP52]]) +// CHECK3-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK3-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK3-NEXT: [[TMP55:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP55]], 0 // CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK3-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -4089,23 +4385,29 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4115,81 +4417,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -4202,16 +4523,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4222,94 +4538,98 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_16:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK3-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP38]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_16]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4317,12 +4637,12 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK3-NEXT: br i1 [[TMP44]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP45]], 0 // CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -4342,25 +4662,32 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK3-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -4371,89 +4698,107 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -4466,17 +4811,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4487,96 +4827,102 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_19:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK3-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK3-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[I4]], ptr [[TMP42]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP43]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[CLASS_ANON_19]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP44]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP53]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP45]], 1 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -4584,12 +4930,12 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -5240,23 +5586,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5266,83 +5618,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5355,16 +5726,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5377,104 +5743,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -5493,23 +5863,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5519,83 +5895,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5608,16 +6003,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5630,104 +6020,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -5747,25 +6141,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5775,110 +6176,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK9-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -5891,16 +6311,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5913,104 +6328,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -6029,23 +6448,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6055,83 +6480,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6144,16 +6588,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6166,104 +6605,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -6283,25 +6726,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6312,91 +6762,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6409,17 +6877,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6432,125 +6895,131 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 // CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] @@ -6569,23 +7038,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6595,83 +7070,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6684,16 +7178,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6706,87 +7195,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX7]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store double [[ADD8]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -6794,12 +7287,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -6819,25 +7312,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6848,91 +7348,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6945,17 +7463,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6968,89 +7481,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX8]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -7058,12 +7577,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK9-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -7650,23 +8169,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7676,83 +8201,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP62]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -7765,16 +8309,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7787,104 +8326,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -7903,23 +8446,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7929,83 +8478,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP68]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8018,16 +8586,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8040,104 +8603,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8157,25 +8724,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8185,110 +8759,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: -// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] +// CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK9-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK9-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK9-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK9-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -8301,16 +8894,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8323,104 +8911,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP77]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP77]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8439,23 +9031,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8465,83 +9063,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP80]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP80]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP80]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP80]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP81:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8554,16 +9171,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8576,104 +9188,108 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP36]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP83]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP83]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP84:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -8693,25 +9309,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -8722,91 +9345,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP86]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP86]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP86]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP86]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP87:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -8819,17 +9460,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8842,125 +9478,131 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP14]] to i32 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[CONV6]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP25]] to i32 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP24]], [[CONV6]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89:![0-9]+]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 [[IDXPROM11]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[IDXPROM14]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP38]] to i64 +// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i64 [[IDXPROM11]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK9-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i64 [[IDXPROM14]] // CHECK9-NEXT: store i32 [[ADD13]], ptr [[ARRAYIDX15]], align 4, !llvm.access.group [[ACC_GRP89]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP89]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK9-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK9-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK9-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 // CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] @@ -8979,23 +9621,29 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9005,83 +9653,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92:![0-9]+]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP92]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP92]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP92]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP92]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP93:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -9094,16 +9761,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9116,87 +9778,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95:![0-9]+]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95:![0-9]+]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM6]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM9]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP95]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] +// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP95]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP96:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -9204,12 +9870,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] @@ -9229,25 +9895,32 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_27:%.*]], align 8 // CHECK9-NEXT: store i64 [[CH]], ptr [[CH_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -9258,91 +9931,109 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_27:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_27]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i64 [[TMP20]], i64 [[TMP22]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP24]]), !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP98]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP98]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP98]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP98]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK9-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP43]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -9355,17 +10046,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9378,89 +10064,95 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_28:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_28]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101:![0-9]+]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101:![0-9]+]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i64 [[IDXPROM10]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP37]] to i64 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i64 [[IDXPROM7]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP40]] to i64 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i64 [[IDXPROM10]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP101]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP101]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK9: omp.inner.for.end: @@ -9468,12 +10160,12 @@ // CHECK9: omp.dispatch.inc: // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK9-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] @@ -10070,23 +10762,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10096,81 +10794,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10183,16 +10900,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10205,99 +10917,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10316,23 +11032,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10342,81 +11064,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP27]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10429,16 +11170,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10451,99 +11187,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10563,25 +11303,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..6, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10591,108 +11338,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -10705,16 +11471,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10727,99 +11488,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -10838,23 +11603,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..10, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10864,81 +11635,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..11, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10951,16 +11741,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10973,99 +11758,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -11085,25 +11874,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..14, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -11114,89 +11910,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..15, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP45]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11209,17 +12023,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11232,118 +12041,124 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load double, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK11-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -11362,23 +12177,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..18, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11388,81 +12209,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..19, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP51]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11475,16 +12315,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11497,82 +12332,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store double [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11580,12 +12419,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -11605,25 +12444,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..22, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -11634,89 +12480,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..23, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11729,17 +12593,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11752,84 +12611,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load double, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store double [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP60]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP60]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -11837,12 +12702,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12429,23 +13294,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..26, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12455,81 +13326,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..27, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP63]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP63]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP63]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP63]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -12542,16 +13432,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12564,99 +13449,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP66]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP66]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP67:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12675,23 +13564,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..30, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12701,81 +13596,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..31, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP69]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP69]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP69]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP69]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -12788,16 +13702,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12810,99 +13719,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP72]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP72]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -12922,25 +13835,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..34, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12950,108 +13870,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP23]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..35, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]]), !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ] // CHECK11-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP75]] +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP75]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP76:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -13064,16 +14003,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13086,99 +14020,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP78]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP78]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP79:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -13197,23 +14135,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..38, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..38, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..38 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13223,81 +14167,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_19:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_19]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..39, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..39, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP81]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP81]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP81]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP81]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13310,16 +14273,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..39 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13332,99 +14290,103 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP84]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP84]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK11-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] @@ -13444,25 +14406,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..42, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..42, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..42 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -13473,89 +14442,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..43, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..43, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP87]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP87]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP87]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP87]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -13568,17 +14555,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..43 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13591,118 +14573,124 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90:![0-9]+]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 [[TMP30]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP39]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP40]], i32 [[TMP41]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP90]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], 1 // CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP90]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] // CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] // CHECK11-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 -// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP48]]) +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK11-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP40]], 0 +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK11-NEXT: [[MUL16:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL16]] @@ -13721,23 +14709,29 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..46, ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..46, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..46 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13747,81 +14741,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93:![0-9]+]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..47, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]), !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..47, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP93]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP93]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP93]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP93]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13834,16 +14847,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..47 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13856,82 +14864,86 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP22]], i32 35, i32 [[TMP19]], i32 [[TMP20]], i32 1, i32 1) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP24]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96:![0-9]+]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96:![0-9]+]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 [[TMP28]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP30]], i32 [[TMP31]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP35]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP96]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP96]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -13939,12 +14951,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -13964,25 +14976,32 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 4 // CHECK11-NEXT: store i32 [[CH]], ptr [[CH_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..50, ptr [[CH_ADDR]], ptr [[N_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[CH_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..50, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..50 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CH:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[CH_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -13993,89 +15012,107 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[CH]], ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CH_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..51, i32 [[TMP19]], i32 [[TMP20]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i32 [[TMP22]]), !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..51, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP99]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP99]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP99]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP99]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP41]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -14088,17 +15125,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..51 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14111,84 +15143,90 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_26:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP14]], i32 35, i32 [[TMP11]], i32 [[TMP12]], i32 1, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP25]], i32 35, i32 [[TMP22]], i32 [[TMP23]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP16]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP27]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102:![0-9]+]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102:![0-9]+]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP32]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 [[TMP29]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 [[TMP37]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP38]] +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 [[TMP40]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP102]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], 1 // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP102]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -14196,12 +15234,12 @@ // CHECK11: omp.dispatch.inc: // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP44]], 0 // CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -288,25 +288,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -315,98 +321,105 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP21]], ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP24]], ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float [[TMP28]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP32:%.*]] = load volatile double, ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP32]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float [[TMP36]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK1-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -415,16 +428,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -433,83 +445,95 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -553,6 +577,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -566,20 +591,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -588,92 +618,102 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double [[TMP20]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float [[TMP24]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP29:%.*]] = load volatile double, ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double [[TMP29]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -682,16 +722,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], i32 noundef [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -699,86 +737,96 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store double [[TMP8]], ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP12]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP3]], ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP6]], align 8 +// CHECK3-NEXT: store double [[TMP15]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -975,6 +1023,7 @@ // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -984,21 +1033,27 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1007,138 +1062,149 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK8-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK8-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: store i32 [[TMP21]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP22:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR9]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: store i32 [[TMP24]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP25:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[VEC4]], i64 [[TMP22]], ptr [[S_ARR5]], ptr [[TMP23]], i64 [[TMP25]]), !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK8-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK8-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK8-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK8-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) +// CHECK8-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK8-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done12: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1146,122 +1212,130 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK8-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK8-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP29]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK8-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK8-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK8-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1279,10 +1353,10 @@ // CHECK8-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK8-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK8-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1293,9 +1367,9 @@ // CHECK8-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK8-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK8-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK8-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK8-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK8-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1359,12 +1433,12 @@ // CHECK8-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK8: omp_offload.cont: // CHECK8-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: // CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1439,6 +1513,7 @@ // CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1447,20 +1522,25 @@ // CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1469,128 +1549,140 @@ // CHECK8-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK8-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK8-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK8-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done6: -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP20:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], ptr [[VEC4]], i64 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]]), !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK8-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK8-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK8-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK8-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK8-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK8-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK8-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done11: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done7: // CHECK8-NEXT: ret void // // // CHECK8-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK8-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK8-NEXT: entry: // CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK8-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK8-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1598,121 +1690,127 @@ // CHECK8-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK8-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK8-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK8-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK8-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK8-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK8-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK8-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK8-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK8-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK8-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK8-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK8-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK8-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK8-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK8-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK8-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK8-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK8-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK8-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK8-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK8-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK8-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK8-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK8-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK8-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK8-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK8: omp.arraycpy.body: -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK8: omp.arraycpy.done5: -// CHECK8-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR6]], ptr align 4 [[TMP6]], i64 4, i1 false) -// CHECK8-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK8-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK8-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK8: omp.arraycpy.done3: +// CHECK8-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK8-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK8-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK8: cond.true: // CHECK8-NEXT: br label [[COND_END:%.*]] // CHECK8: cond.false: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK8-NEXT: br label [[COND_END]] // CHECK8: cond.end: -// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK8-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK8-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK8-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK8-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK8-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK8-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK8-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK8-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: -// CHECK8-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK8-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK8-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK8-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK8-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK8-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK8: .omp.final.then: // CHECK8-NEXT: store i32 2, ptr [[I]], align 4 // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: -// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK8-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK8-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK8-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK8: arraydestroy.body: -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK8-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK8-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK8: arraydestroy.done13: +// CHECK8-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK8-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK8: arraydestroy.done10: // CHECK8-NEXT: ret void // // @@ -1732,7 +1830,7 @@ // CHECK8-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: store i32 0, ptr [[F]], align 4 // CHECK8-NEXT: ret void // @@ -1745,7 +1843,7 @@ // CHECK8-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK8-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK8-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK8-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK8-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK8-NEXT: ret void @@ -1913,6 +2011,7 @@ // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1922,21 +2021,27 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1945,136 +2050,147 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK10-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: store i32 [[TMP19]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR9]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: store i32 [[TMP22]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[VEC4]], i32 [[TMP20]], ptr [[S_ARR5]], ptr [[TMP21]], i32 [[TMP23]]), !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK10-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK10-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK10-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done12: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2082,118 +2198,126 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP30]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP29]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK10-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK10-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2211,10 +2335,10 @@ // CHECK10-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK10-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK10-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2225,9 +2349,9 @@ // CHECK10-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK10-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK10-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK10-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK10-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2291,12 +2415,12 @@ // CHECK10-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK10: omp_offload.cont: // CHECK10-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: // CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2371,6 +2495,7 @@ // CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2379,20 +2504,25 @@ // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2401,126 +2531,138 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK10-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done6: -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done3: +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: store i32 [[TMP17]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], ptr [[VEC4]], i32 [[TMP18]], ptr [[S_ARR5]], ptr [[TMP19]]), !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK10-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK10-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done7: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK10-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2528,117 +2670,123 @@ // CHECK10-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK10-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK10-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK10-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK10-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK10-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK10-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK10-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK10-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK10-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK10-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK10-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK10-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK10: omp.arraycpy.body: -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK10: omp.arraycpy.done4: -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR5]], ptr align 4 [[TMP6]], i32 4, i1 false) -// CHECK10-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK10-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK10-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK10: omp.arraycpy.done2: +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK10-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK10-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK10-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK10-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK10-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK10-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK10-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK10-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK10-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK10: .omp.final.then: // CHECK10-NEXT: store i32 2, ptr [[I]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK10-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK10-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK10: arraydestroy.body: -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK10-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK10-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK10: arraydestroy.done11: +// CHECK10-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK10-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK10: arraydestroy.done8: // CHECK10-NEXT: ret void // // @@ -2658,7 +2806,7 @@ // CHECK10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: store i32 0, ptr [[F]], align 4 // CHECK10-NEXT: ret void // @@ -2671,7 +2819,7 @@ // CHECK10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK10-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK10-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK10-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK10-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -180,70 +180,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -252,12 +265,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -267,60 +279,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -331,75 +347,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -408,12 +437,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -423,43 +451,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -467,17 +499,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -594,70 +626,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -666,12 +711,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -681,43 +725,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -725,17 +773,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -746,75 +794,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -823,12 +884,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -838,43 +898,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -882,17 +946,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -904,88 +968,102 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -994,12 +1072,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1009,43 +1086,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -1053,17 +1134,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1178,70 +1259,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1250,12 +1344,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1265,43 +1358,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -1309,17 +1406,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1330,75 +1427,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1407,12 +1517,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1422,43 +1531,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -1466,17 +1579,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1488,88 +1601,102 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[ARG_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1578,12 +1705,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1593,43 +1719,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -1637,17 +1767,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1725,70 +1855,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1797,12 +1940,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1812,60 +1954,64 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1876,75 +2022,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1953,12 +2112,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1968,43 +2126,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -2012,17 +2174,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2139,70 +2301,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2211,12 +2386,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2226,43 +2400,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -2270,17 +2448,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2291,75 +2469,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2368,12 +2559,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2383,43 +2573,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2427,17 +2621,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2449,17 +2643,20 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -2467,78 +2664,88 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 // CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP20]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then6: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP21]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2546,39 +2753,45 @@ // CHECK3: omp_if.else7: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK3: omp.inner.for.cond8: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] // CHECK3: omp.inner.for.body10: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[DOTCAPTURE_EXPR__CASTED12]], align 1 -// CHECK3-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED12]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP28]] to i1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK3-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +// CHECK3-NEXT: store i64 [[TMP29]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP32]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP34]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK3: omp_if.else16: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP35]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] // CHECK3: omp_if.end18: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK3: omp.inner.for.inc19: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end21: @@ -2586,10 +2799,10 @@ // CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2598,13 +2811,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2614,48 +2826,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -2663,60 +2883,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2725,13 +2945,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2741,48 +2960,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -2790,60 +3017,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2958,70 +3185,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3030,12 +3270,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3045,43 +3284,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -3089,17 +3332,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3110,75 +3353,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3187,12 +3443,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3202,43 +3457,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3246,17 +3505,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3268,88 +3527,102 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[ARG_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3358,12 +3631,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3373,43 +3645,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -3417,17 +3693,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4081,70 +4357,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK9-SAME: () #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4153,12 +4442,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4168,60 +4456,64 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4232,75 +4524,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4309,12 +4614,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4324,43 +4628,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -4368,17 +4676,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4495,70 +4803,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4567,12 +4888,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4582,43 +4902,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -4626,17 +4950,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4647,75 +4971,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4724,12 +5061,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4739,43 +5075,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -4783,17 +5123,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4805,88 +5145,102 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4895,12 +5249,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4910,43 +5263,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -4954,17 +5311,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5079,70 +5436,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5151,12 +5521,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5166,43 +5535,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -5210,17 +5583,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5231,75 +5604,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5308,12 +5694,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5323,43 +5708,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -5367,17 +5756,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5389,88 +5778,102 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[ARG_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5479,12 +5882,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5494,43 +5896,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -5538,17 +5944,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5626,70 +6032,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK11-SAME: () #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5698,12 +6117,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5713,60 +6131,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5777,75 +6199,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5854,12 +6289,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5869,43 +6303,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -5913,17 +6351,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6040,70 +6478,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6112,12 +6563,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6127,43 +6577,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -6171,17 +6625,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6192,75 +6646,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6269,12 +6736,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6284,43 +6750,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -6328,17 +6798,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6350,17 +6820,20 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[ARG_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 @@ -6368,78 +6841,88 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 // CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP20]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then6: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP21]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: @@ -6447,39 +6930,45 @@ // CHECK11: omp_if.else7: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] // CHECK11: omp.inner.for.cond8: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] // CHECK11: omp.inner.for.body10: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL11:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL11]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[DOTCAPTURE_EXPR__CASTED12]], align 1 -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED12]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP28]] to i1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK11-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +// CHECK11-NEXT: store i64 [[TMP29]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP30]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP32]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP34]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) // CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] // CHECK11: omp_if.else16: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP29]], ptr [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP35]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP5]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] // CHECK11: omp_if.end18: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] // CHECK11: omp.inner.for.inc19: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end21: @@ -6487,10 +6976,10 @@ // CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6499,13 +6988,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6515,48 +7003,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -6564,60 +7060,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6626,13 +7122,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6642,48 +7137,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -6691,60 +7194,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6859,70 +7362,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l59 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6931,12 +7447,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6946,43 +7461,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -6990,17 +7509,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7011,75 +7530,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l65 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7088,12 +7620,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7103,43 +7634,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -7147,17 +7682,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7169,88 +7704,102 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[ARG_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP13]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7259,12 +7808,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7274,43 +7822,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -7318,17 +7870,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -233,25 +233,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -260,107 +266,121 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[G3]], ptr [[TMP17]], ptr [[SVAR6]], ptr [[SFVAR7]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -368,112 +388,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 8 -// CHECK1-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -515,6 +539,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -528,20 +553,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -550,105 +580,119 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[G3]], ptr [[TMP15]], ptr [[SVAR6]], ptr [[SFVAR7]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -656,110 +700,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 4 -// CHECK3-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -955,6 +1003,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -964,21 +1013,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -987,33 +1042,38 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1023,112 +1083,122 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP18]], ptr [[SVAR8]]), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done11: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP29]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP41]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP42]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP43]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done13: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1136,39 +1206,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1178,106 +1252,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1295,10 +1369,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1309,9 +1383,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1375,12 +1449,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1455,6 +1529,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1463,20 +1538,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1485,142 +1565,156 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP17]]), !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done6: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP39]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1628,143 +1722,147 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1784,7 +1882,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1797,7 +1895,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1966,6 +2064,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1975,21 +2074,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1998,33 +2103,38 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2034,110 +2144,120 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP16]], ptr [[SVAR8]]), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK11-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP39]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP40]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP41]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2145,37 +2265,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2185,104 +2309,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2300,10 +2424,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2314,9 +2438,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2380,12 +2504,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2460,6 +2584,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2468,20 +2593,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2490,140 +2620,154 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC4]], ptr [[T_VAR3]], ptr [[S_ARR5]], ptr [[TMP15]]), !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done6: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2631,139 +2775,143 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2783,7 +2931,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2796,7 +2944,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -236,71 +236,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -309,12 +322,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -324,43 +336,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -370,27 +386,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: unreachable // // @@ -405,77 +421,91 @@ // CHECK1-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -484,12 +514,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -499,43 +528,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -545,27 +578,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: unreachable // // @@ -725,71 +758,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -798,12 +844,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -813,43 +858,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -859,98 +908,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -959,12 +1021,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -974,43 +1035,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1020,98 +1085,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1120,12 +1198,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1135,43 +1212,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1181,77 +1262,84 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -1260,48 +1348,53 @@ // CHECK1-NEXT: [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK1: invoke.cont2: -// CHECK1-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1311,43 +1404,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1357,27 +1454,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: unreachable // // @@ -1870,71 +1967,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1943,12 +2053,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1958,43 +2067,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2004,27 +2117,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: unreachable // // @@ -2039,77 +2152,91 @@ // CHECK5-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2118,12 +2245,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2133,43 +2259,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2179,27 +2309,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: unreachable // // @@ -2350,71 +2480,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2423,12 +2566,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2438,43 +2580,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2484,98 +2630,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2584,12 +2743,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2599,43 +2757,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2645,98 +2807,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2745,12 +2920,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2760,43 +2934,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2806,77 +2984,84 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -2885,48 +3070,53 @@ // CHECK5-NEXT: [[CALL:%.*]] = invoke signext i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK5: invoke.cont2: -// CHECK5-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK5-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2936,43 +3126,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2982,27 +3176,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: unreachable // // @@ -3165,71 +3359,84 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3238,12 +3445,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3253,43 +3459,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -3299,27 +3509,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: unreachable // // @@ -3334,77 +3544,91 @@ // CHECK9-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3413,12 +3637,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3428,43 +3651,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -3474,27 +3701,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: unreachable // // @@ -3654,71 +3881,84 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3727,12 +3967,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3742,43 +3981,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -3788,98 +4031,111 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3888,12 +4144,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3903,43 +4158,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -3949,98 +4208,111 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4049,12 +4321,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4064,43 +4335,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -4110,77 +4385,84 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: // CHECK9-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -4189,48 +4471,53 @@ // CHECK9-NEXT: [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK9: invoke.cont2: -// CHECK9-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: unreachable // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4240,43 +4527,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: invoke void @_Z3foov() @@ -4286,27 +4577,27 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void // CHECK9: terminate.lpad: -// CHECK9-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK9-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK9-NEXT: catch ptr null -// CHECK9-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK9-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: unreachable // // @@ -4799,71 +5090,84 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK13-SAME: () #[[ATTR3:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4872,12 +5176,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4887,43 +5190,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -4933,27 +5240,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: unreachable // // @@ -4968,77 +5275,91 @@ // CHECK13-SAME: (i64 [[A:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[A_ADDR]]) +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP9:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP9]]), !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP2]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5047,12 +5368,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5062,43 +5382,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5108,27 +5432,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: unreachable // // @@ -5279,71 +5603,84 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5352,12 +5689,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5367,43 +5703,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5413,98 +5753,111 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP27]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5513,12 +5866,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5528,43 +5880,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5574,98 +5930,111 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP33]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l52 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5674,12 +6043,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5689,43 +6057,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5735,77 +6107,84 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP39]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l57 // CHECK13-SAME: () #[[ATTR3]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: // CHECK13-NEXT: invoke void @_ZN1SC1El(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 23) @@ -5814,48 +6193,53 @@ // CHECK13-NEXT: [[CALL:%.*]] = invoke i8 @_ZN1ScvcEv(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK13-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[TERMINATE_LPAD]], !llvm.access.group [[ACC_GRP42]] // CHECK13: invoke.cont2: -// CHECK13-NEXT: [[TMP7:%.*]] = sext i8 [[CALL]] to i32 -// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP7]]), !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP8:%.*]] = sext i8 [[CALL]] to i32 +// CHECK13-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]], !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK13-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK13-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK13-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] +// CHECK13-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP42]] // CHECK13-NEXT: unreachable // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK13-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5865,43 +6249,47 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK13-NEXT: invoke void @_Z3foov() @@ -5911,27 +6299,27 @@ // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK13-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: ret void // CHECK13: terminate.lpad: -// CHECK13-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK13-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK13-NEXT: catch ptr null -// CHECK13-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] +// CHECK13-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK13-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR10]], !llvm.access.group [[ACC_GRP45]] // CHECK13-NEXT: unreachable // // diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp @@ -151,15 +151,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -167,62 +169,73 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -231,12 +244,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -250,80 +262,84 @@ // CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -354,15 +370,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -370,60 +388,71 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -432,12 +461,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -451,78 +479,82 @@ // CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP14]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP15]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -588,27 +620,27 @@ // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95() #[[ATTR4:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -616,18 +648,18 @@ // CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -656,15 +688,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -672,6 +706,8 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -679,8 +715,11 @@ // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -698,62 +737,68 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -763,12 +808,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -785,15 +829,19 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -810,72 +858,72 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group !9 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] @@ -898,62 +946,62 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1016,15 +1064,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1032,88 +1082,99 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !14 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1122,12 +1183,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1137,104 +1197,108 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group !17 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1258,7 +1322,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1271,7 +1335,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1318,27 +1382,27 @@ // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95() #[[ATTR4:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1346,18 +1410,18 @@ // CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1386,15 +1450,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l95 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1402,6 +1468,8 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1409,8 +1477,11 @@ // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 @@ -1428,60 +1499,66 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] @@ -1491,12 +1568,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1513,15 +1589,19 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1536,70 +1616,70 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1622,62 +1702,62 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1740,15 +1820,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1756,86 +1838,97 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1844,12 +1937,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1859,100 +1951,104 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1976,7 +2072,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1989,7 +2085,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void @@ -2047,8 +2143,8 @@ // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 // CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -2063,43 +2159,43 @@ // CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM7]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group !2 +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP6]], i64 4, i1 false), !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] @@ -2108,18 +2204,18 @@ // CHECK13-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() // CHECK13-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY13:%.*]] // CHECK13: arraydestroy.body13: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST14:%.*]] = phi ptr [ [[TMP13]], [[ARRAYDESTROY_DONE11]] ], [ [[ARRAYDESTROY_ELEMENT15:%.*]], [[ARRAYDESTROY_BODY13]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST14:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE11]] ], [ [[ARRAYDESTROY_ELEMENT15:%.*]], [[ARRAYDESTROY_BODY13]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT15]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST14]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT15]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE16:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT15]], [[ARRAY_BEGIN12]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE16]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY13]] // CHECK13: arraydestroy.done17: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK13-NEXT: ret i32 [[TMP14]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK13-NEXT: ret i32 [[TMP11]] // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -2186,8 +2282,8 @@ // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 // CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -2202,43 +2298,43 @@ // CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM7]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group !6 +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP6]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] @@ -2246,18 +2342,18 @@ // CHECK13: arraydestroy.done11: // CHECK13-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY13:%.*]] // CHECK13: arraydestroy.body13: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST14:%.*]] = phi ptr [ [[TMP13]], [[ARRAYDESTROY_DONE11]] ], [ [[ARRAYDESTROY_ELEMENT15:%.*]], [[ARRAYDESTROY_BODY13]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST14:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE11]] ], [ [[ARRAYDESTROY_ELEMENT15:%.*]], [[ARRAYDESTROY_BODY13]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT15]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST14]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT15]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE16:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT15]], [[ARRAY_BEGIN12]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE16]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY13]] // CHECK13: arraydestroy.done17: // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK13-NEXT: ret i32 [[TMP14]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK13-NEXT: ret i32 [[TMP11]] // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -2397,8 +2493,8 @@ // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 // CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -2413,41 +2509,41 @@ // CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP8]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP7]], i32 4, i1 false), !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP5]] +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP7]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP6]], i32 4, i1 false), !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] @@ -2456,18 +2552,18 @@ // CHECK15-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK15-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY12:%.*]] // CHECK15: arraydestroy.body12: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST13:%.*]] = phi ptr [ [[TMP13]], [[ARRAYDESTROY_DONE10]] ], [ [[ARRAYDESTROY_ELEMENT14:%.*]], [[ARRAYDESTROY_BODY12]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST13:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE10]] ], [ [[ARRAYDESTROY_ELEMENT14:%.*]], [[ARRAYDESTROY_BODY12]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT14]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST13]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT14]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE15:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT14]], [[ARRAY_BEGIN11]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY12]] // CHECK15: arraydestroy.done16: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK15-NEXT: ret i32 [[TMP14]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK15-NEXT: ret i32 [[TMP11]] // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -2534,8 +2630,8 @@ // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 // CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -2550,41 +2646,41 @@ // CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP8]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP7]], i32 4, i1 false), !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP5]] +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP7]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP6]], i32 4, i1 false), !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] @@ -2592,18 +2688,18 @@ // CHECK15: arraydestroy.done10: // CHECK15-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY12:%.*]] // CHECK15: arraydestroy.body12: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST13:%.*]] = phi ptr [ [[TMP13]], [[ARRAYDESTROY_DONE10]] ], [ [[ARRAYDESTROY_ELEMENT14:%.*]], [[ARRAYDESTROY_BODY12]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST13:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE10]] ], [ [[ARRAYDESTROY_ELEMENT14:%.*]], [[ARRAYDESTROY_BODY12]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT14]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST13]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT14]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE15:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT14]], [[ARRAY_BEGIN11]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY12]] // CHECK15: arraydestroy.done16: // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK15-NEXT: ret i32 [[TMP14]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK15-NEXT: ret i32 [[TMP11]] // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -119,71 +119,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -192,12 +205,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -207,60 +219,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -271,71 +287,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -344,12 +373,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -359,60 +387,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -456,71 +488,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -529,12 +574,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -544,60 +588,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -633,22 +681,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 @@ -658,22 +706,22 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV5]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK3: omp.inner.for.cond7: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK3: omp.inner.for.body9: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK3: omp.body.continue12: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK3: omp.inner.for.inc13: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end15: // CHECK3-NEXT: store i32 1000, ptr [[I6]], align 4 @@ -695,22 +743,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 diff --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp --- a/clang/test/OpenMP/distribute_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_private_codegen.cpp @@ -130,15 +130,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -155,66 +157,68 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -241,15 +245,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -266,66 +272,68 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -363,53 +371,53 @@ // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93() #[[ATTR4:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP13]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] // CHECK9: omp_offload.failed4: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102() #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT5]] @@ -417,18 +425,18 @@ // CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done6: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP24]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP23]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -457,15 +465,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -482,6 +492,8 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -499,65 +511,65 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -579,15 +591,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -598,49 +612,51 @@ // CHECK9-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // @@ -648,62 +664,62 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -766,15 +782,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -784,89 +802,91 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] @@ -890,7 +910,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -903,7 +923,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -952,53 +972,53 @@ // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93() #[[ATTR4:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP20]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP19]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) +// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK11-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] // CHECK11: omp_offload.failed4: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102() #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT5]] @@ -1006,18 +1026,18 @@ // CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done6: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP24]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP23]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1046,15 +1066,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1071,6 +1093,8 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1088,63 +1112,63 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1166,15 +1190,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1185,49 +1211,51 @@ // CHECK11-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: ret void // // @@ -1235,62 +1263,62 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1353,15 +1381,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1371,87 +1401,89 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1475,7 +1507,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1488,7 +1520,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_codegen.cpp @@ -165,52 +165,52 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -225,23 +225,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -251,84 +257,86 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] -// CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] +// CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -357,52 +365,52 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -417,23 +425,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -443,82 +457,84 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 32, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -547,52 +563,52 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 16908289, ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK1-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 16908289, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -607,23 +623,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -633,99 +655,101 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] -// CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] +// CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -745,7 +769,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i8 0, ptr [[A]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[I]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], ptr [[I_CASTED]], align 1 @@ -755,53 +779,53 @@ // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[A]], align 1 -// CHECK1-NEXT: store i8 [[TMP16]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV3]] -// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[A]], align 1 +// CHECK1-NEXT: store i8 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] +// CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[ADD6]] to i64 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[ADD4]] to i64 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP20]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP21]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115(i64 [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -814,119 +838,125 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK1-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK1-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !20 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK1-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK1-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK1-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK1-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK1-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK1-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK1-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK1-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK1-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK1-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK1-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -956,34 +986,34 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135(i64 [[TMP1]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -995,17 +1025,20 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1015,73 +1048,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1117,52 +1152,52 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1177,23 +1212,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1203,80 +1244,82 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] +// CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1305,52 +1348,52 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1365,23 +1408,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1391,78 +1440,80 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK3-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 32, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1491,52 +1542,52 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 16908289, ptr [[TMP34]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK3-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 16908289, ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1551,23 +1602,29 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1577,95 +1634,97 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] +// CHECK3-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1685,7 +1744,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i8 0, ptr [[A]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = load i8, ptr [[I]], align 1 // CHECK3-NEXT: store i8 [[TMP0]], ptr [[I_CASTED]], align 1 @@ -1695,53 +1754,53 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[A]], align 1 -// CHECK3-NEXT: store i8 [[TMP16]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV3]] -// CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[A]], align 1 +// CHECK3-NEXT: store i8 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] +// CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK3-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[TMP19:%.*]] = zext i32 [[ADD6]] to i64 +// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[ADD4]] to i64 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 [[TMP19]], ptr [[TMP28]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP24]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115(i32 [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1754,119 +1813,125 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK3-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK3-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK3-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK3-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK3-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK3-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK3-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK3-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK3-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK3-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK3-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK3-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK3-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK3-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK3-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK3-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK3-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1896,34 +1961,34 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 100, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 100, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135(i32 [[TMP1]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1935,17 +2000,20 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1955,73 +2023,75 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2057,52 +2127,52 @@ // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK5-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK5-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK5-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK5-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK5-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK5-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK5-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK5-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK5-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr @.offload_sizes, ptr [[TMP30]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr @.offload_maptypes, ptr [[TMP31]], align 8 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK5-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK5-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK5-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) -// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK5-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK5-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK5-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK5-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK5-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK5-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK5-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK5: omp_offload.failed: // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]] // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -2117,23 +2187,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2143,84 +2219,86 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] -// CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] +// CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2249,52 +2327,52 @@ // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK5-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK5-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK5-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK5-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK5-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK5-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK5-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK5-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK5-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr @.offload_sizes.2, ptr [[TMP30]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP31]], align 8 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK5-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK5-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK5-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK5-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK5-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK5-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @.offload_sizes.2, ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK5-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK5-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK5-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK5-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK5: omp_offload.failed: // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -2309,23 +2387,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2335,82 +2419,84 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !nontemporal !15 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !nontemporal !15 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !nontemporal !15 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !nontemporal !15 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK5-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 32, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2439,52 +2525,52 @@ // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK5-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK5-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK5-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK5-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK5-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK5-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK5-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK5-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK5-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr @.offload_sizes.5, ptr [[TMP30]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP31]], align 8 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK5-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK5-NEXT: store i64 16908289, ptr [[TMP34]], align 8 -// CHECK5-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) -// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK5-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK5-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK5-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @.offload_sizes.5, ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK5-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK5-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK5-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK5-NEXT: store i64 16908289, ptr [[TMP26]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK5: omp_offload.failed: // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -2499,23 +2585,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2525,99 +2617,101 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] -// CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] +// CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK5-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2637,7 +2731,7 @@ // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i8 0, ptr [[A]], align 1 // CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[I]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], ptr [[I_CASTED]], align 1 @@ -2647,53 +2741,53 @@ // CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK5-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK5-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP16:%.*]] = load i8, ptr [[A]], align 1 -// CHECK5-NEXT: store i8 [[TMP16]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK5-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV3]] -// CHECK5-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK5-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK5-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[A]], align 1 +// CHECK5-NEXT: store i8 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK5-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] +// CHECK5-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK5-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK5-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[TMP19:%.*]] = zext i32 [[ADD6]] to i64 +// CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[ADD4]] to i64 // CHECK5-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 1, ptr [[TMP20]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK5-NEXT: store i32 2, ptr [[TMP21]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP14]], ptr [[TMP22]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP15]], ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr @.offload_sizes.8, ptr [[TMP24]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP25]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr null, ptr [[TMP26]], align 8 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK5-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK5-NEXT: store i64 [[TMP19]], ptr [[TMP28]], align 8 -// CHECK5-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) -// CHECK5-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK5-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK5-NEXT: store i32 1, ptr [[TMP16]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK5-NEXT: store i32 2, ptr [[TMP17]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP10]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP11]], ptr [[TMP19]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @.offload_sizes.8, ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK5-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK5-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK5-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP24]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) +// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK5-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK5: omp_offload.failed: // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115(i64 [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -2706,150 +2800,156 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK5-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK5-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK5-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 -// CHECK5-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK5-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK5-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !15, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK5-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK5-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !15, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK5: omp.inner.for.cond13: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK5: omp.inner.for.body15: -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK5: omp.inner.for.cond12: // CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK5-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK5: omp.body.continue20: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK5: omp.inner.for.inc21: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP24:![0-9]+]] -// CHECK5: omp.inner.for.end23: +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK5: omp.inner.for.body14: +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK5-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK5-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK5: omp.body.continue19: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK5: omp.inner.for.inc20: +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK5-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK5: omp.inner.for.end22: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK5-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK5-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK5-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK5-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK5-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK5-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK5-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK5-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK5-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK5-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK5-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK5-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK5-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK5-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK5-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK5-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK5-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK5-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK5-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] @@ -2879,34 +2979,34 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK5-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK5-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK5-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr @.offload_sizes.11, ptr [[TMP13]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK5-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK5-NEXT: store i64 100, ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) -// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK5-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK5-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK5-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK5-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK5-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK5-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK5-NEXT: store i64 100, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK5: omp_offload.failed: // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135(i64 [[TMP1]]) #[[ATTR3]] // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -2918,17 +3018,20 @@ // CHECK5-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2938,73 +3041,75 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3040,52 +3145,52 @@ // CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK7-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK7-NEXT: store ptr @.offload_sizes, ptr [[TMP30]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK7-NEXT: store ptr @.offload_maptypes, ptr [[TMP31]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK7-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK7-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK7-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK7-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) -// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK7-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK7-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK7-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK7-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK7-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK7-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK7-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK7-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK7-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.region_id, ptr [[KERNEL_ARGS]]) +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK7: omp_offload.failed: // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]] // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -3100,23 +3205,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3126,80 +3237,82 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK7-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] -// CHECK7-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK7-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK7-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK7-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK7-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] +// CHECK7-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3228,52 +3341,52 @@ // CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK7-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK7-NEXT: store ptr @.offload_sizes.2, ptr [[TMP30]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK7-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP31]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK7-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK7-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK7-NEXT: store i64 4571424, ptr [[TMP34]], align 8 -// CHECK7-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK7-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK7-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK7-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK7-NEXT: store ptr @.offload_sizes.2, ptr [[TMP22]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK7-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK7-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK7-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK7-NEXT: store i64 4571424, ptr [[TMP26]], align 8 +// CHECK7-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK7: omp_offload.failed: // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -3288,23 +3401,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3314,78 +3433,80 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK7-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4, !nontemporal !16 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK7-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK7-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4, !nontemporal !16 -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4, !nontemporal !16 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK7-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK7-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK7-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK7-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4, !nontemporal !16 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK7-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK7-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 32, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3414,52 +3535,52 @@ // CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP21]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK7-NEXT: store i32 4, ptr [[TMP27]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK7-NEXT: store ptr @.offload_sizes.5, ptr [[TMP30]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK7-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP31]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK7-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK7-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK7-NEXT: store i64 16908289, ptr [[TMP34]], align 8 -// CHECK7-NEXT: [[TMP35:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) -// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK7-NEXT: br i1 [[TMP36]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK7-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK7-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK7-NEXT: store ptr @.offload_sizes.5, ptr [[TMP22]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK7-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK7-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK7-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK7-NEXT: store i64 16908289, ptr [[TMP26]], align 8 +// CHECK7-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.region_id, ptr [[KERNEL_ARGS]]) +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK7: omp_offload.failed: // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3]] // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -3474,23 +3595,29 @@ // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3500,95 +3627,97 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK7-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK7-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK7-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] -// CHECK7-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK7-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK7-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] +// CHECK7-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK7-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3608,7 +3737,7 @@ // CHECK7-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i8 0, ptr [[A]], align 1 // CHECK7-NEXT: [[TMP0:%.*]] = load i8, ptr [[I]], align 1 // CHECK7-NEXT: store i8 [[TMP0]], ptr [[I_CASTED]], align 1 @@ -3618,53 +3747,53 @@ // CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP11]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK7-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP16:%.*]] = load i8, ptr [[A]], align 1 -// CHECK7-NEXT: store i8 [[TMP16]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV3:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK7-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV3]] -// CHECK7-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1 -// CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], 1 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr null, ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK7-NEXT: store ptr null, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, ptr [[A]], align 1 +// CHECK7-NEXT: store i8 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK7-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] +// CHECK7-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 +// CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK7-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 -// CHECK7-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK7-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: [[TMP19:%.*]] = zext i32 [[ADD6]] to i64 +// CHECK7-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK7-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = zext i32 [[ADD4]] to i64 // CHECK7-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 1, ptr [[TMP20]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK7-NEXT: store i32 2, ptr [[TMP21]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP14]], ptr [[TMP22]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP15]], ptr [[TMP23]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK7-NEXT: store ptr @.offload_sizes.8, ptr [[TMP24]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK7-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP25]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK7-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK7-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK7-NEXT: store i64 [[TMP19]], ptr [[TMP28]], align 8 -// CHECK7-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) -// CHECK7-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK7-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK7-NEXT: store i32 1, ptr [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK7-NEXT: store i32 2, ptr [[TMP17]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP10]], ptr [[TMP18]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP11]], ptr [[TMP19]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK7-NEXT: store ptr @.offload_sizes.8, ptr [[TMP20]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK7-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK7-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK7-NEXT: store ptr null, ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK7-NEXT: store i64 [[TMP15]], ptr [[TMP24]], align 8 +// CHECK7-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.region_id, ptr [[KERNEL_ARGS]]) +// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK7-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK7: omp_offload.failed: // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115(i32 [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -3677,150 +3806,156 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..7, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK7-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK7-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK7-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK7-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK7-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK7-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK7-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK7-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK7: omp.precond.then: // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 -// CHECK7-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK7-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK7-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK7-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK7-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !16, !llvm.access.group [[ACC_GRP22]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK7-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK7: omp.inner.for.cond13: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK7-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK7: omp.inner.for.body15: -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK7: omp.inner.for.cond12: // CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK7-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK7-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK7: omp.body.continue20: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK7: omp.inner.for.inc21: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] -// CHECK7: omp.inner.for.end23: +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK7: omp.inner.for.body14: +// CHECK7-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK7-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK7-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK7-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK7: omp.body.continue19: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK7: omp.inner.for.inc20: +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK7-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK7: omp.inner.for.end22: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK7-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK7-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK7-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK7-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK7-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK7-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK7-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK7-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK7-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK7-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK7-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK7-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK7-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK7-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK7-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK7-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK7-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK7-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK7-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK7-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK7-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: br label [[OMP_PRECOND_END]] @@ -3850,34 +3985,34 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK7-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK7-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK7-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK7-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK7-NEXT: store ptr @.offload_sizes.11, ptr [[TMP13]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK7-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP14]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK7-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK7-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK7-NEXT: store i64 100, ptr [[TMP17]], align 8 -// CHECK7-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) -// CHECK7-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK7-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK7-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK7-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK7-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK7-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK7-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK7-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK7-NEXT: store i64 100, ptr [[TMP15]], align 8 +// CHECK7-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.region_id, ptr [[KERNEL_ARGS]]) +// CHECK7-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK7-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK7: omp_offload.failed: // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135(i32 [[TMP1]]) #[[ATTR3]] // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -3889,17 +4024,20 @@ // CHECK7-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[AA_ADDR]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3909,73 +4047,75 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK7-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 100, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4014,44 +4154,44 @@ // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 16) ] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group !2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group !2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP9]] to i64 // CHECK9-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM1]] -// CHECK9-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[MUL3:%.*]] = fmul float [[TMP7]], [[TMP10]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group !2 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP13]] -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group !2 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM7]] -// CHECK9-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: store i32 32000001, ptr [[I]], align 4 @@ -4146,44 +4286,44 @@ // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP3]], 127 // CHECK9-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP5]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP8]] to i64 // CHECK9-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM1]] -// CHECK9-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[MUL3:%.*]] = fmul float [[TMP6]], [[TMP9]] -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM4]] -// CHECK9-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP12]] -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM7]] -// CHECK9-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: [[ADD9:%.*]] = add i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: store i32 -2147483522, ptr [[I]], align 4 @@ -4232,25 +4372,25 @@ // CHECK9-NEXT: store i8 [[TMP6]], ptr [[DOTLINEAR_START]], align 1 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: [[CONV9:%.*]] = sext i8 [[TMP9]] to i32 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK9-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK9-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group !12 +// CHECK9-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 @@ -4293,22 +4433,22 @@ // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK9-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 @@ -4339,40 +4479,40 @@ // CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i32 16) ] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group !3 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 [[TMP6]] -// CHECK11-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group !3 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 [[TMP9]] -// CHECK11-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[MUL2:%.*]] = fmul float [[TMP7]], [[TMP10]] -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group !3 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 [[TMP12]] -// CHECK11-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group !3 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK11-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: store i32 32000001, ptr [[I]], align 4 @@ -4463,40 +4603,40 @@ // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[MUL:%.*]] = mul i32 [[TMP3]], 127 // CHECK11-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 [[TMP5]] -// CHECK11-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 [[TMP8]] -// CHECK11-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[MUL2:%.*]] = fmul float [[TMP6]], [[TMP9]] -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP12]] -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK11-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: [[ADD6:%.*]] = add i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: store i32 -2147483522, ptr [[I]], align 4 @@ -4545,25 +4685,25 @@ // CHECK11-NEXT: store i8 [[TMP6]], ptr [[DOTLINEAR_START]], align 1 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: [[CONV9:%.*]] = sext i8 [[TMP9]] to i32 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK11-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK11-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group !13 +// CHECK11-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 @@ -4606,22 +4746,22 @@ // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK11-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 @@ -4652,44 +4792,44 @@ // CHECK13-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 16) ] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM1:%.*]] = sext i32 [[TMP9]] to i64 // CHECK13-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 [[IDXPROM1]] -// CHECK13-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[MUL3:%.*]] = fmul float [[TMP7]], [[TMP10]] -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM4]] -// CHECK13-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP13]] -// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM7]] -// CHECK13-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 32000001, ptr [[I]], align 4 @@ -4784,44 +4924,44 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[MUL:%.*]] = mul i32 [[TMP3]], 127 // CHECK13-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP5]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 [[IDXPROM1]] -// CHECK13-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[MUL3:%.*]] = fmul float [[TMP6]], [[TMP9]] -// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP11]] to i64 // CHECK13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM4]] -// CHECK13-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[MUL6:%.*]] = fmul float [[MUL3]], [[TMP12]] -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP14]] to i64 // CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM7]] -// CHECK13-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: [[ADD9:%.*]] = add i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 -2147483522, ptr [[I]], align 4 @@ -4874,25 +5014,25 @@ // CHECK13: omp_if.then: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: [[CONV9:%.*]] = sext i8 [[TMP10]] to i32 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK13-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK13-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !7, !llvm.access.group !13 +// CHECK13-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !7, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK13-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_IF_END:%.*]] @@ -4962,22 +5102,22 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, ptr [[I]], align 4 @@ -5008,40 +5148,40 @@ // CHECK15-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i32 16) ] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 7 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 [[TMP6]] -// CHECK15-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 [[TMP9]] -// CHECK15-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[MUL2:%.*]] = fmul float [[TMP7]], [[TMP10]] -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 [[TMP12]] -// CHECK15-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP13]] -// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK15-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 32000001, ptr [[I]], align 4 @@ -5132,40 +5272,40 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[MUL:%.*]] = mul i32 [[TMP3]], 127 // CHECK15-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 [[TMP5]] -// CHECK15-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 [[TMP8]] -// CHECK15-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX1]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[MUL2:%.*]] = fmul float [[TMP6]], [[TMP9]] -// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 [[TMP11]] -// CHECK15-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[MUL4:%.*]] = fmul float [[MUL2]], [[TMP12]] -// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK15-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: store float [[MUL4]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: [[ADD6:%.*]] = add i32 [[TMP15]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 -2147483522, ptr [[I]], align 4 @@ -5218,25 +5358,25 @@ // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: [[CONV9:%.*]] = sext i8 [[TMP10]] to i32 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK15-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK15-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !8, !llvm.access.group !14 +// CHECK15-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !8, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK15-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -5306,22 +5446,22 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, ptr [[I]], align 4 @@ -5335,23 +5475,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5361,84 +5507,86 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] -// CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK17-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] +// CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK17-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5453,23 +5601,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5479,82 +5633,84 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK17-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK17-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK17-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 32, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5569,23 +5725,29 @@ // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5595,99 +5757,101 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] -// CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK17-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK17-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK17-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] +// CHECK17-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK17-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5700,119 +5864,125 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK17-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK17-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK17-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK17-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK17-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK17-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 -// CHECK17-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK17-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK17-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK17-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK17-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK17-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK17-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK17-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK17-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK17-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK17-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK17-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK17-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK17-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK17-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK17-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK17-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK17-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK17-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK17-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK17-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK17-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: br label [[OMP_PRECOND_END]] @@ -5824,17 +5994,20 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5844,73 +6017,75 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 100, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5925,23 +6100,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5951,80 +6132,82 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK19-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] -// CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] +// CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6039,23 +6222,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6065,78 +6254,80 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK19-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK19-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK19-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK19-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 32, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6151,23 +6342,29 @@ // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6177,95 +6374,97 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] -// CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK19-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK19-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK19-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] +// CHECK19-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 +// CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK19-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6278,119 +6477,125 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK19-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK19-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK19-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK19-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK19-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK19-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK19-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 -// CHECK19-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK19-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK19-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CONV8:%.*]] = sext i8 [[TMP19]] to i32 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK19-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK19-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK19-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK19-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK19-NEXT: [[CONV14:%.*]] = sext i8 [[TMP24]] to i32 -// CHECK19-NEXT: [[SUB15:%.*]] = sub i32 10, [[CONV14]] -// CHECK19-NEXT: [[SUB16:%.*]] = sub i32 [[SUB15]], 1 -// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[SUB16]], 1 -// CHECK19-NEXT: [[DIV18:%.*]] = udiv i32 [[ADD17]], 1 -// CHECK19-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV13]], [[MUL19]] -// CHECK19-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK19-NEXT: store i8 [[CONV21]], ptr [[TMP0]], align 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV12:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK19-NEXT: [[TMP27:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK19-NEXT: [[SUB14:%.*]] = sub i32 10, [[CONV13]] +// CHECK19-NEXT: [[SUB15:%.*]] = sub i32 [[SUB14]], 1 +// CHECK19-NEXT: [[ADD16:%.*]] = add i32 [[SUB15]], 1 +// CHECK19-NEXT: [[DIV17:%.*]] = udiv i32 [[ADD16]], 1 +// CHECK19-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV12]], [[MUL18]] +// CHECK19-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK19-NEXT: store i8 [[CONV20]], ptr [[TMP2]], align 1 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: // CHECK19-NEXT: br label [[OMP_PRECOND_END]] @@ -6402,17 +6607,20 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6422,73 +6630,75 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 100, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6503,23 +6713,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6529,84 +6745,86 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i64 16) ] +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i64 16) ] // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM2]] -// CHECK21-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM5]] -// CHECK21-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !9 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM8]] -// CHECK21-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK21-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK21-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK21-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP26]] +// CHECK21-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM8]] +// CHECK21-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK21-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6621,23 +6839,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6647,82 +6871,84 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK21-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !nontemporal !16 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK21-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK21-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !nontemporal !16 -// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !nontemporal !16 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK21-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK21-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK21-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK21-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !nontemporal !16 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK21-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK21-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK21-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK21-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 32, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6737,23 +6963,29 @@ // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6763,99 +6995,101 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK21-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK21-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK21-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK21-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group !19 -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] -// CHECK21-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK21-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK21-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK21-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK21-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK21-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] +// CHECK21-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK21-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK21-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK21-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 +// CHECK21-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK21-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK21-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK21-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6868,150 +7102,156 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK21-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK21-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK21-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK21-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK21-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK21-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK21-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK21-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK21-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK21-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK21-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK21: omp.precond.then: // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK21-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK21-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK21-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK21-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 -// CHECK21-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK21-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK21-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 +// CHECK21-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK21-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK21-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK21-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !16, !llvm.access.group [[ACC_GRP22]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK21-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK21-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK21: omp.inner.for.cond13: -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK21-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK21: omp.inner.for.body15: -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK21: omp.inner.for.cond12: // CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK21-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK21: omp.body.continue20: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK21: omp.inner.for.inc21: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] -// CHECK21: omp.inner.for.end23: +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK21-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK21: omp.inner.for.body14: +// CHECK21-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK21-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK21-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK21-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK21: omp.body.continue19: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK21: omp.inner.for.inc20: +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK21-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK21: omp.inner.for.end22: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK21-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: -// CHECK21-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK21-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK21-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK21-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK21-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK21-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK21-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK21-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK21-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK21-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK21-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK21-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK21-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK21-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK21-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK21-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK21-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK21-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK21-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK21-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: // CHECK21-NEXT: br label [[OMP_PRECOND_END]] @@ -7023,17 +7263,20 @@ // CHECK21-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7043,73 +7286,75 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK21-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK21-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK21-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK21-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 100, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7124,23 +7369,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7150,80 +7401,82 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP4]], i32 16) ] +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP9]], i32 16) ] // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 4571423 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 4571423 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 7 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 [[TMP14]] -// CHECK23-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i32 [[TMP17]] -// CHECK23-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] -// CHECK23-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] +// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK23-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK23-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 [[TMP25]] +// CHECK23-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP26]] +// CHECK23-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i32 [[TMP28]] +// CHECK23-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK23-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 32000001, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7238,23 +7491,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7264,78 +7523,80 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK23-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 4, !nontemporal !17 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 [[TMP13]] -// CHECK23-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 [[TMP16]] -// CHECK23-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 -// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i32 [[TMP19]] -// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 4, !nontemporal !17 -// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i32 [[TMP22]] +// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 4, !nontemporal !17 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] +// CHECK23-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] +// CHECK23-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +// CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK23-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] +// CHECK23-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP25]] +// CHECK23-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 4, !nontemporal !17 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 [[TMP27]] // CHECK23-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK23-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK23-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK23-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 32, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7350,23 +7611,29 @@ // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7376,95 +7643,97 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK23-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 [[TMP15]] -// CHECK23-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i32 [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK23-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i32 [[TMP24]] -// CHECK23-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i32 [[TMP20]] +// CHECK23-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 [[TMP23]] +// CHECK23-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK23-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 [[TMP26]] +// CHECK23-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP27]] +// CHECK23-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i32 [[TMP29]] +// CHECK23-NEXT: store float [[MUL6]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK23-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP30]], 1 +// CHECK23-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK23-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK23-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK23-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 -2147483522, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7477,150 +7746,156 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..3, ptr [[I_ADDR]], ptr [[A_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[I:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[A:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[I4:%.*]] = alloca i8, align 1 +// CHECK23-NEXT: [[I:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[I6:%.*]] = alloca i8, align 1 +// CHECK23-NEXT: [[I5:%.*]] = alloca i8, align 1 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK23-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK23-NEXT: store i8 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 // CHECK23-NEXT: [[SUB:%.*]] = sub i32 10, [[CONV]] // CHECK23-NEXT: [[SUB2:%.*]] = sub i32 [[SUB]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add i32 [[SUB2]], 1 // CHECK23-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK23-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK23-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: store i8 [[TMP4]], ptr [[I4]], align 1 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV5:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV5]], 10 +// CHECK23-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV4]], 10 // CHECK23-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK23: omp.precond.then: // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK23-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK23-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP1]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP4]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK23-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK23-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !23 -// CHECK23-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] -// CHECK23-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK23-NEXT: store i8 [[CONV11]], ptr [[I6]], align 1, !nontemporal !17, !llvm.access.group !23 +// CHECK23-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[CONV8:%.*]] = sext i8 [[TMP20]] to i32 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK23-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[MUL]] +// CHECK23-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i8 +// CHECK23-NEXT: store i8 [[CONV10]], ptr [[I5]], align 1, !nontemporal !17, !llvm.access.group [[ACC_GRP23]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK23-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK23: omp.inner.for.cond13: -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP14:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK23-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END23:%.*]] -// CHECK23: omp.inner.for.body15: -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV16:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK23: omp.inner.for.cond12: // CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV16]], [[MUL17]] -// CHECK23-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK23-NEXT: store i8 [[CONV19]], ptr [[I6]], align 1 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE20:%.*]] -// CHECK23: omp.body.continue20: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC21:%.*]] -// CHECK23: omp.inner.for.inc21: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] -// CHECK23: omp.inner.for.end23: +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP13:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK23-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END22:%.*]] +// CHECK23: omp.inner.for.body14: +// CHECK23-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV15:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL16:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK23-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV15]], [[MUL16]] +// CHECK23-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK23-NEXT: store i8 [[CONV18]], ptr [[I5]], align 1 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE19:%.*]] +// CHECK23: omp.body.continue19: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC20:%.*]] +// CHECK23: omp.inner.for.inc20: +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK23-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK23: omp.inner.for.end22: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK23-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: -// CHECK23-NEXT: [[TMP29:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV24:%.*]] = sext i8 [[TMP29]] to i32 -// CHECK23-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[CONV25:%.*]] = sext i8 [[TMP30]] to i32 -// CHECK23-NEXT: [[SUB26:%.*]] = sub i32 10, [[CONV25]] -// CHECK23-NEXT: [[SUB27:%.*]] = sub i32 [[SUB26]], 1 -// CHECK23-NEXT: [[ADD28:%.*]] = add i32 [[SUB27]], 1 -// CHECK23-NEXT: [[DIV29:%.*]] = udiv i32 [[ADD28]], 1 -// CHECK23-NEXT: [[MUL30:%.*]] = mul nsw i32 [[DIV29]], 1 -// CHECK23-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV24]], [[MUL30]] -// CHECK23-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK23-NEXT: store i8 [[CONV32]], ptr [[TMP0]], align 1 +// CHECK23-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV23:%.*]] = sext i8 [[TMP32]] to i32 +// CHECK23-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[CONV24:%.*]] = sext i8 [[TMP33]] to i32 +// CHECK23-NEXT: [[SUB25:%.*]] = sub i32 10, [[CONV24]] +// CHECK23-NEXT: [[SUB26:%.*]] = sub i32 [[SUB25]], 1 +// CHECK23-NEXT: [[ADD27:%.*]] = add i32 [[SUB26]], 1 +// CHECK23-NEXT: [[DIV28:%.*]] = udiv i32 [[ADD27]], 1 +// CHECK23-NEXT: [[MUL29:%.*]] = mul nsw i32 [[DIV28]], 1 +// CHECK23-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV23]], [[MUL29]] +// CHECK23-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK23-NEXT: store i8 [[CONV31]], ptr [[TMP2]], align 1 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: // CHECK23-NEXT: br label [[OMP_PRECOND_END]] @@ -7632,17 +7907,20 @@ // CHECK23-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[AA_ADDR]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7652,73 +7930,75 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[CONV]]) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 99 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK23-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK23-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK23-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK23-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK23-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 100, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp @@ -179,25 +179,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -206,107 +212,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD9]], ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK1-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD5]], ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK1-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK1-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -350,6 +358,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -363,20 +372,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -385,107 +399,109 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP5]], ptr [[G3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load volatile double, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store double [[TMP7]], ptr [[G14]], align 8 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store float [[TMP9]], ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: store double [[TMP10]], ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store double [[TMP12]], ptr [[G1]], align 8 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store float [[TMP14]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load double, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP18]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD9]], ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD10:%.*]] = fadd double [[TMP20]], 1.000000e+00 -// CHECK3-NEXT: store volatile double [[ADD10]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 3 -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP22]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV]], 4.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD5]], ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = load volatile double, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], 1.000000e+00 +// CHECK3-NEXT: store volatile double [[ADD6]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 3 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP27]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 4.000000e+00 +// CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float +// CHECK3-NEXT: store float [[CONV9]], ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -682,6 +698,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -691,21 +708,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -714,124 +737,126 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX12]], ptr align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1009,6 +1034,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1017,20 +1043,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1039,119 +1070,121 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done6: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i64 4, i1 false) -// CHECK9-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1352,6 +1385,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1361,21 +1395,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1384,122 +1424,124 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP8]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[SVAR9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP21]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1677,6 +1719,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1685,20 +1728,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1707,117 +1755,119 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done6: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR7]], ptr align 4 [[TMP7]], i32 4, i1 false) -// CHECK11-NEXT: store ptr [[VAR7]], ptr [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VAR]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP8]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp @@ -171,25 +171,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -198,106 +204,108 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP26]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP29]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP30]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 8 +// CHECK1-NEXT: store volatile double [[TMP32]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP34]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -339,6 +347,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -352,20 +361,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -374,106 +388,108 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G3]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR6]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR7]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[G3]], align 8 -// CHECK3-NEXT: store volatile double [[TMP25]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP26]], align 4 -// CHECK3-NEXT: store volatile double [[TMP27]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK3-NEXT: store float [[TMP29]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP30]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store volatile double [[TMP32]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP34]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -669,6 +685,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -678,21 +695,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -701,33 +724,35 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -737,106 +762,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[TMP34]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1014,6 +1039,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1022,20 +1048,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1044,30 +1075,32 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1077,104 +1110,104 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[TMP32]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1376,6 +1409,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1385,21 +1419,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1408,33 +1448,35 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1444,104 +1486,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP24]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done13: -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[TMP34]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done15: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // @@ -1719,6 +1761,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1727,20 +1770,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1749,30 +1797,32 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1782,102 +1832,102 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK11-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[VEC4]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP31]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP10]], ptr align 4 [[TMP32]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/distribute_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_private_codegen.cpp @@ -131,15 +131,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -156,69 +158,71 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP9]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -249,15 +253,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -274,69 +280,71 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -408,83 +416,83 @@ // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93() #[[ATTR4:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[I_CASTED]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[I_CASTED]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[I_CASTED]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP12]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP12]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP21]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP22]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP19]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes, ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP29]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP19]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] // CHECK9: omp_offload.failed4: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP13]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i64 [[TMP12]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT5]] // CHECK9: omp_offload.cont5: // CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done6: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP33]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP30]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -513,15 +521,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -538,6 +548,8 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -555,72 +567,72 @@ // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group !6 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group !6 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -643,76 +655,81 @@ // CHECK9-SAME: (i64 noundef [[I:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I1]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK9-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[TMP2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void @@ -722,62 +739,62 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -840,15 +857,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -858,96 +877,98 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group !15 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group !15 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] @@ -971,7 +992,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -984,7 +1005,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1037,83 +1058,83 @@ // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93() #[[ATTR4:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[I_CASTED]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I_CASTED]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS3:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP19]], ptr [[TMP23]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes, ptr [[TMP25]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP28]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP29]], align 8 -// CHECK11-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP17]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS3]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP26]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102.region_id, ptr [[KERNEL_ARGS3]]) +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED4:%.*]], label [[OMP_OFFLOAD_CONT5:%.*]] // CHECK11: omp_offload.failed4: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP13]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l102(i32 [[TMP12]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT5]] // CHECK11: omp_offload.cont5: // CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT5]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done6: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP33]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP30]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1142,15 +1163,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1167,6 +1190,8 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1184,70 +1209,70 @@ // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group !7 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP7]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1270,76 +1295,81 @@ // CHECK11-SAME: (i32 noundef [[I:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I1]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: store i32 2, ptr [[TMP0]], align 4 +// CHECK11-NEXT: store i32 2, ptr [[TMP2]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: ret void @@ -1349,62 +1379,62 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 2, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 2, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49() #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done2: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1467,15 +1497,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l49 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1485,94 +1517,96 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group !16 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1596,7 +1630,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1609,7 +1643,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void @@ -1673,8 +1707,8 @@ // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 // CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -1689,43 +1723,43 @@ // CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM7]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group !2 +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP6]], i64 4, i1 false), !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] @@ -1733,44 +1767,44 @@ // CHECK13: arraydestroy.done11: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB14]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB15]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB14]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV16]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB14]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV16]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND18:%.*]] // CHECK13: omp.inner.for.cond18: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB15]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[CMP19:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB15]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[CMP19:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK13-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY20:%.*]], label [[OMP_INNER_FOR_END26:%.*]] // CHECK13: omp.inner.for.body20: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK13-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK13-NEXT: store i32 [[ADD22]], ptr [[I17]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD22]], ptr [[I17]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE23:%.*]] // CHECK13: omp.body.continue23: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC24:%.*]] // CHECK13: omp.inner.for.inc24: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND18]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end26: // CHECK13-NEXT: store i32 2, ptr [[I12]], align 4 // CHECK13-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() // CHECK13-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN27]], i64 2 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN27]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY28:%.*]] // CHECK13: arraydestroy.body28: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST29:%.*]] = phi ptr [ [[TMP18]], [[OMP_INNER_FOR_END26]] ], [ [[ARRAYDESTROY_ELEMENT30:%.*]], [[ARRAYDESTROY_BODY28]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST29:%.*]] = phi ptr [ [[TMP15]], [[OMP_INNER_FOR_END26]] ], [ [[ARRAYDESTROY_ELEMENT30:%.*]], [[ARRAYDESTROY_BODY28]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT30]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST29]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT30]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE31:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT30]], [[ARRAY_BEGIN27]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE31]], label [[ARRAYDESTROY_DONE32:%.*]], label [[ARRAYDESTROY_BODY28]] // CHECK13: arraydestroy.done32: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK13-NEXT: ret i32 [[TMP19]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK13-NEXT: ret i32 [[TMP16]] // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1837,8 +1871,8 @@ // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 // CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -1853,43 +1887,43 @@ // CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group !9 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM7]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group !9 +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP6]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] @@ -1897,18 +1931,18 @@ // CHECK13: arraydestroy.done11: // CHECK13-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK13-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY13:%.*]] // CHECK13: arraydestroy.body13: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST14:%.*]] = phi ptr [ [[TMP13]], [[ARRAYDESTROY_DONE11]] ], [ [[ARRAYDESTROY_ELEMENT15:%.*]], [[ARRAYDESTROY_BODY13]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST14:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE11]] ], [ [[ARRAYDESTROY_ELEMENT15:%.*]], [[ARRAYDESTROY_BODY13]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT15]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST14]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT15]]) #[[ATTR4]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE16:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT15]], [[ARRAY_BEGIN12]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE16]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY13]] // CHECK13: arraydestroy.done17: // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK13-NEXT: ret i32 [[TMP14]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK13-NEXT: ret i32 [[TMP11]] // // // CHECK13-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -2054,8 +2088,8 @@ // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 // CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -2070,41 +2104,41 @@ // CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP8]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP7]], i32 4, i1 false), !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP5]] +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP7]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP6]], i32 4, i1 false), !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] @@ -2112,44 +2146,44 @@ // CHECK15: arraydestroy.done10: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB13]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB14]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB13]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV15]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB13]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV15]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND17:%.*]] // CHECK15: omp.inner.for.cond17: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV15]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB14]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[CMP18:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV15]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB14]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[CMP18:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK15-NEXT: br i1 [[CMP18]], label [[OMP_INNER_FOR_BODY19:%.*]], label [[OMP_INNER_FOR_END25:%.*]] // CHECK15: omp.inner.for.body19: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV15]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[MUL20:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV15]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[MUL20:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK15-NEXT: [[ADD21:%.*]] = add nsw i32 0, [[MUL20]] -// CHECK15-NEXT: store i32 [[ADD21]], ptr [[I16]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD21]], ptr [[I16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] // CHECK15: omp.body.continue22: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] // CHECK15: omp.inner.for.inc23: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV15]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK15-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV15]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV15]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV15]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND17]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end25: // CHECK15-NEXT: store i32 2, ptr [[I11]], align 4 // CHECK15-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK15-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN26:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN26]], i32 2 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN26]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY27:%.*]] // CHECK15: arraydestroy.body27: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST28:%.*]] = phi ptr [ [[TMP18]], [[OMP_INNER_FOR_END25]] ], [ [[ARRAYDESTROY_ELEMENT29:%.*]], [[ARRAYDESTROY_BODY27]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST28:%.*]] = phi ptr [ [[TMP15]], [[OMP_INNER_FOR_END25]] ], [ [[ARRAYDESTROY_ELEMENT29:%.*]], [[ARRAYDESTROY_BODY27]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT29]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST28]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT29]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE30:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT29]], [[ARRAY_BEGIN26]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE30]], label [[ARRAYDESTROY_DONE31:%.*]], label [[ARRAYDESTROY_BODY27]] // CHECK15: arraydestroy.done31: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK15-NEXT: ret i32 [[TMP19]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK15-NEXT: ret i32 [[TMP16]] // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -2216,8 +2250,8 @@ // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 // CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -2232,41 +2266,41 @@ // CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP8]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP7]], i32 4, i1 false), !llvm.access.group !10 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP5]] +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP7]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP6]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] @@ -2274,18 +2308,18 @@ // CHECK15: arraydestroy.done10: // CHECK15-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY12:%.*]] // CHECK15: arraydestroy.body12: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST13:%.*]] = phi ptr [ [[TMP13]], [[ARRAYDESTROY_DONE10]] ], [ [[ARRAYDESTROY_ELEMENT14:%.*]], [[ARRAYDESTROY_BODY12]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST13:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE10]] ], [ [[ARRAYDESTROY_ELEMENT14:%.*]], [[ARRAYDESTROY_BODY12]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT14]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST13]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT14]]) #[[ATTR4]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE15:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT14]], [[ARRAY_BEGIN11]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY12]] // CHECK15: arraydestroy.done16: // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK15-NEXT: ret i32 [[TMP14]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK15-NEXT: ret i32 [[TMP11]] // // // CHECK15-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev diff --git a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp @@ -97,34 +97,34 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -137,88 +137,93 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -234,41 +239,41 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret i32 0 @@ -278,88 +283,93 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -385,34 +395,34 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i32 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -425,88 +435,93 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -522,41 +537,41 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret i32 0 @@ -566,88 +581,93 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -676,26 +696,26 @@ // CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 @@ -722,37 +742,37 @@ // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK5-NEXT: ret i32 0 // @@ -775,26 +795,26 @@ // CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 @@ -821,37 +841,37 @@ // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK7-NEXT: ret i32 0 // @@ -870,92 +890,97 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_firstprivate_codegen.cpp b/clang/test/OpenMP/for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/for_firstprivate_codegen.cpp @@ -418,6 +418,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -426,22 +427,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -510,14 +519,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -526,121 +532,123 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR7:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP8:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC4]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef [[AGG_TMP8]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP8]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR7]], ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM11]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX12]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP23]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR7]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done15: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP25]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP30]]) // CHECK1-NEXT: ret void // // @@ -858,11 +866,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -873,78 +881,80 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[G1]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SIVAR3]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile i32 2, ptr [[TMP15]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -1100,18 +1110,21 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1122,57 +1135,59 @@ // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[G1]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[G1]], align 4 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP13]], align 4 -// CHECK4-NEXT: store i32 2, ptr [[SIVAR3]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 +// CHECK4-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1184,30 +1199,30 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP14]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP15]], ptr [[BLOCK_CAPTURED5]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK4-NEXT: call void [[TMP18]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP16:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP16]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP17]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], ptr [[BLOCK_CAPTURED5]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK4-NEXT: call void [[TMP20]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_lastprivate_codegen.cpp b/clang/test/OpenMP/for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/for_lastprivate_codegen.cpp @@ -369,6 +369,10 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(24) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -379,26 +383,36 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_3]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -438,44 +452,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -485,102 +497,102 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR5]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done13: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done8: +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP10]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done16: -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP31]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -595,10 +607,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -610,74 +623,77 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr @f, align 4 -// CHECK1-NEXT: store float [[TMP0]], ptr [[F]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr @f, align 4 +// CHECK1-NEXT: store float [[TMP1]], ptr [[F]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr [[X]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: store double [[TMP13]], ptr @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[F]], align 4 -// CHECK1-NEXT: store float [[TMP14]], ptr @f, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: store double [[TMP14]], ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[F]], align 4 +// CHECK1-NEXT: store float [[TMP15]], ptr @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -687,76 +703,79 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 4, ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr @f, align 4 -// CHECK1-NEXT: store float [[TMP2]], ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr @f, align 4 +// CHECK1-NEXT: store float [[TMP3]], ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP13]], ptr @f, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -767,70 +786,72 @@ // CHECK1-NEXT: [[CNT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 1, ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 1, ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 // CHECK1-NEXT: store float 0.000000e+00, ptr [[F]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: // CHECK1-NEXT: store i8 2, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP12]], ptr @cnt, align 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[F]], align 4 -// CHECK1-NEXT: store float [[TMP13]], ptr @f, align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP13]], ptr @cnt, align 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[F]], align 4 +// CHECK1-NEXT: store float [[TMP14]], ptr @f, align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -838,37 +859,46 @@ // CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 128 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 128 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..5, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 128 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP3]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -876,6 +906,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -899,7 +930,9 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -907,44 +940,44 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK1-NEXT: [[B6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[BF_LOAD7:%.*]] = load i8, ptr [[B6]], align 8 // CHECK1-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD7]], 4 // CHECK1-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK1-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK1-NEXT: [[TMP11:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK1-NEXT: [[TMP12:%.*]] = trunc i32 [[DEC]] to i8 // CHECK1-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B6]], align 8 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP11]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP12]], 15 // CHECK1-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK1-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET10]], ptr [[B6]], align 8 @@ -952,16 +985,16 @@ // CHECK1-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK1-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 // CHECK1-NEXT: [[C11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[C11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[C11]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP13]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -973,11 +1006,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1001,27 +1034,29 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK1-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK1-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK1-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 @@ -1095,7 +1130,7 @@ // CHECK1-NEXT: [[TMP33:%.*]] = load [4 x i8], ptr [[TMP32]], align 1 // CHECK1-NEXT: store [4 x i8] [[TMP33]], ptr [[TMP12]], align 1 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK1-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 // CHECK1-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 // CHECK1-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B16]], align 8 // CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 @@ -1176,14 +1211,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1192,133 +1224,135 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP1]], ptr align 128 [[VEC4]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 128 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 128 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_4]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP28]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP30]]) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN16:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN16]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN16]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done17: -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP33]]) // CHECK1-NEXT: ret void // // @@ -1338,7 +1372,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -1347,6 +1381,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1361,7 +1396,9 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1369,41 +1406,41 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: @@ -1415,11 +1452,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -1433,71 +1470,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP13]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1509,7 +1548,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1556,6 +1595,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1580,7 +1620,9 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[THIS1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1588,43 +1630,43 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE0_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: @@ -1636,11 +1678,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1665,27 +1707,29 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK3-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK3-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 @@ -1721,7 +1765,7 @@ // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP10]], align 8 // CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 @@ -1759,7 +1803,7 @@ // CHECK3-NEXT: [[TMP34:%.*]] = load [4 x i8], ptr [[TMP33]], align 1 // CHECK3-NEXT: store [4 x i8] [[TMP34]], ptr [[TMP12]], align 1 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK3-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 // CHECK3-NEXT: [[TMP36:%.*]] = trunc i32 [[TMP35]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B16]], align 8 // CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP36]], 15 @@ -1776,6 +1820,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1795,13 +1840,21 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], ptr [[TMP12]], ptr [[TMP14]], ptr [[TMP16]]) +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // @@ -1809,6 +1862,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR2]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1838,21 +1892,22 @@ // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 2 // CHECK3-NEXT: store i32 [[MUL]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], ptr [[TMP9]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1863,111 +1918,112 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B7:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[A5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP18]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP24]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1981,95 +2037,97 @@ // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[B3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[B3]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B3]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP16]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B3]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP17]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: // CHECK3-NEXT: store i32 2, ptr [[B3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B3]], align 4 -// CHECK3-NEXT: store i32 [[TMP19]], ptr [[B]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[B3]], align 4 -// CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = trunc i32 [[TMP23]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B7]], align 8 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP21]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP24]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], ptr [[B7]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2080,86 +2138,88 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr @g1, align 8 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 128 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile i32 1, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store i32 2, ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SIVAR3]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile i32 1, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[G]], align 128 -// CHECK3-NEXT: store volatile i32 [[TMP19]], ptr @g, align 128 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP21]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 128 +// CHECK3-NEXT: store volatile i32 [[TMP21]], ptr @g, align 128 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP23]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2184,9 +2244,9 @@ // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 // CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 // CHECK4-NEXT: store i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK4-NEXT: call void [[TMP4]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void [[TMP2]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: ret i32 0 // // @@ -2208,18 +2268,21 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2230,55 +2293,57 @@ // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr @g1, align 8 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 128 -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP11]], align 4 -// CHECK4-NEXT: store i32 2, ptr [[SIVAR3]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP13]], align 4 +// CHECK4-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 128 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP12]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP14]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2290,43 +2355,43 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load volatile i32, ptr [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP13]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP14]], ptr [[BLOCK_CAPTURED5]], align 32 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], ptr [[BLOCK_CAPTURED6]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP15]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP16]], ptr [[BLOCK_CAPTURED4]], align 32 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32, [84 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK4-NEXT: call void [[TMP19]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK4-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK4-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP24]], ptr @g, align 128 -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR3]], align 4 -// CHECK4-NEXT: store i32 [[TMP27]], ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP23]], ptr @g, align 128 +// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK4-NEXT: ret void // // @@ -2352,6 +2417,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2376,7 +2442,9 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[THIS1]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -2384,31 +2452,31 @@ // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2422,17 +2490,17 @@ // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 5 // CHECK4-NEXT: store ptr [[THIS1]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: store ptr [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK4-NEXT: call void [[TMP13]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: call void [[TMP12]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: @@ -2444,11 +2512,11 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -2473,27 +2541,29 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK4-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK4-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK4-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK4-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK4-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 @@ -2539,7 +2609,7 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 // CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP10]], align 8 // CHECK4-NEXT: store ptr [[TMP19]], ptr [[BLOCK_CAPTURED]], align 8 @@ -2549,41 +2619,41 @@ // CHECK4-NEXT: [[BLOCK_CAPTURED16:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 // CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP13]], align 8 // CHECK4-NEXT: store ptr [[TMP21]], ptr [[BLOCK_CAPTURED16]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK4-NEXT: call void [[TMP25]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK4-NEXT: call void [[TMP23]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK4-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: // CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK4-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK4-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK4-NEXT: store i32 [[TMP29]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP13]], align 8 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK4-NEXT: store i32 [[TMP31]], ptr [[TMP10]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK4-NEXT: store i32 [[TMP32]], ptr [[B]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK4-NEXT: store i32 [[TMP34]], ptr [[TMP11]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP36:%.*]] = load [4 x i8], ptr [[TMP35]], align 1 -// CHECK4-NEXT: store [4 x i8] [[TMP36]], ptr [[TMP12]], align 1 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK4-NEXT: [[B18:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP38:%.*]] = trunc i32 [[TMP37]] to i8 +// CHECK4-NEXT: store i32 [[TMP31]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load [4 x i8], ptr [[TMP32]], align 1 +// CHECK4-NEXT: store [4 x i8] [[TMP33]], ptr [[TMP12]], align 1 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[B11]], align 4 +// CHECK4-NEXT: [[B18:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B18]], align 8 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP38]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], ptr [[B18]], align 8 @@ -2598,6 +2668,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2616,24 +2687,29 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 +// CHECK4-NEXT: store ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], ptr [[TMP5]], ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2644,101 +2720,103 @@ // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A5:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B7:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[A5]], ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP18]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP23]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP25]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP24]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK4-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK4-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK4-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP25]], ptr [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[B7]], align 4 -// CHECK4-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK4-NEXT: store i32 [[TMP28]], ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK4-NEXT: store i32 [[TMP30]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP31]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK4-NEXT: store i32 [[TMP33]], ptr [[TMP12]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP14]]) // CHECK4-NEXT: ret void // // @@ -2747,6 +2825,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2776,19 +2855,22 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR2]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, ptr [[THIS]], ptr [[TMP5]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR2]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2802,83 +2884,85 @@ // CHECK4-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: store ptr [[_TMP2]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store ptr [[C]], ptr [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP11]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 -// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 2 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B]], align 8 // CHECK4-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD]], 4 // CHECK4-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK4-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK4-NEXT: [[TMP14:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK4-NEXT: [[TMP17:%.*]] = trunc i32 [[DEC]] to i8 // CHECK4-NEXT: [[BF_LOAD6:%.*]] = load i8, ptr [[B]], align 8 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP14]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP17]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD6]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], ptr [[B]], align 8 // CHECK4-NEXT: [[BF_RESULT_SHL:%.*]] = shl i8 [[BF_VALUE]], 4 // CHECK4-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK4-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP7]]) // CHECK4-NEXT: ret void // // @@ -2892,6 +2976,10 @@ // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(24) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -2902,26 +2990,36 @@ // CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..3) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_3]]) // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK5-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done1: +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done4: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP2]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP6]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -2961,44 +3059,42 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -3008,102 +3104,102 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP16]] -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[SIVAR5]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done8: +// CHECK5-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP31]], ptr [[TMP10]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: -// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP31]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done11: +// CHECK5-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP34]]) // CHECK5-NEXT: ret void // // @@ -3118,10 +3214,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3133,74 +3230,77 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load float, ptr @f, align 4 -// CHECK5-NEXT: store float [[TMP0]], ptr [[F]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load float, ptr @f, align 4 +// CHECK5-NEXT: store float [[TMP1]], ptr [[F]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load double, ptr [[X]], align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK5-NEXT: [[TMP10:%.*]] = load double, ptr [[X]], align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], ptr [[X]], align 8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 8 -// CHECK5-NEXT: store double [[TMP13]], ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[F]], align 4 -// CHECK5-NEXT: store float [[TMP14]], ptr @f, align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[X]], align 8 +// CHECK5-NEXT: store double [[TMP14]], ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[F]], align 4 +// CHECK5-NEXT: store float [[TMP15]], ptr @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3210,76 +3310,79 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 4, ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: [[TMP2:%.*]] = load float, ptr @f, align 4 -// CHECK5-NEXT: store float [[TMP2]], ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[DOTF__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: [[TMP3:%.*]] = load float, ptr @f, align 4 +// CHECK5-NEXT: store float [[TMP3]], ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK5-NEXT: [[TMP10:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 -// CHECK5-NEXT: store float [[TMP13]], ptr @f, align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[DOTF__VOID_ADDR]], align 4 +// CHECK5-NEXT: store float [[TMP14]], ptr @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTF__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3290,88 +3393,90 @@ // CHECK5-NEXT: [[CNT:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 1, ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 1 -// CHECK5-NEXT: store i8 0, ptr [[TMP2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 0 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[DOTCNT__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 1, ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 1 +// CHECK5-NEXT: store i8 0, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[F]], i32 0, i32 0 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK5-NEXT: store i8 [[CONV]], ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: [[TMP10:%.*]] = load double, ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 +// CHECK5-NEXT: [[TMP11:%.*]] = load double, ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 // CHECK5-NEXT: store double [[INC]], ptr @_ZN1A1xE, align 8 -// CHECK5-NEXT: store float 0.000000e+00, ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP1]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp sle i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: br i1 [[TMP13]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK5-NEXT: store float 0.000000e+00, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK5: lp_cond_then: -// CHECK5-NEXT: store i32 [[TMP11]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK5-NEXT: store float [[TMP14]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store float [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 4 // CHECK5-NEXT: br label [[LP_COND_EXIT]] // CHECK5: lp_cond_exit: -// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP1]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: store i8 2, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 -// CHECK5-NEXT: store i8 [[TMP18]], ptr @cnt, align 1 -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: store float [[TMP19]], ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK5-NEXT: store float [[TMP20]], ptr @f, align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCNT__VOID_ADDR]], align 1 +// CHECK5-NEXT: store i8 [[TMP19]], ptr @cnt, align 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: store float [[TMP20]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store float [[TMP21]], ptr @f, align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTCNT__VOID_ADDR]], ptr inttoptr (i64 3 to ptr)) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // @@ -3379,37 +3484,46 @@ // CHECK5-SAME: () #[[ATTR7:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK5-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 // CHECK5-NEXT: [[VAR:%.*]] = alloca ptr, align 128 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK5-NEXT: store ptr [[TEST]], ptr [[VAR]], align 128 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 128 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..5, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 128 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done1: // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP3]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP6]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -3417,6 +3531,7 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -3440,7 +3555,9 @@ // CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[THIS1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -3448,44 +3565,44 @@ // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK5-NEXT: [[B6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK5-NEXT: [[BF_LOAD7:%.*]] = load i8, ptr [[B6]], align 8 // CHECK5-NEXT: [[BF_SHL:%.*]] = shl i8 [[BF_LOAD7]], 4 // CHECK5-NEXT: [[BF_ASHR:%.*]] = ashr i8 [[BF_SHL]], 4 // CHECK5-NEXT: [[BF_CAST:%.*]] = sext i8 [[BF_ASHR]] to i32 // CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[BF_CAST]], -1 -// CHECK5-NEXT: [[TMP11:%.*]] = trunc i32 [[DEC]] to i8 +// CHECK5-NEXT: [[TMP12:%.*]] = trunc i32 [[DEC]] to i8 // CHECK5-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B6]], align 8 -// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP11]], 15 +// CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP12]], 15 // CHECK5-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK5-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK5-NEXT: store i8 [[BF_SET10]], ptr [[B6]], align 8 @@ -3493,16 +3610,16 @@ // CHECK5-NEXT: [[BF_RESULT_ASHR:%.*]] = ashr i8 [[BF_RESULT_SHL]], 4 // CHECK5-NEXT: [[BF_RESULT_CAST:%.*]] = sext i8 [[BF_RESULT_ASHR]] to i32 // CHECK5-NEXT: [[C11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[C11]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[C11]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP13]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: @@ -3514,11 +3631,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[E:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -3542,27 +3659,29 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 // CHECK5-NEXT: store ptr [[E1]], ptr [[E]], align 8 -// CHECK5-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store ptr [[A2]], ptr [[A]], align 8 -// CHECK5-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C3]], align 8 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK5-NEXT: store ptr [[TMP4]], ptr [[_TMP5]], align 8 +// CHECK5-NEXT: [[C3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C3]], align 8 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK5-NEXT: store ptr [[TMP5]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[_TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP5]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[E7]], ptr align 1 [[TMP7]], i64 4, i1 false) // CHECK5-NEXT: store ptr [[E7]], ptr [[_TMP8]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 @@ -3636,7 +3755,7 @@ // CHECK5-NEXT: [[TMP33:%.*]] = load [4 x i8], ptr [[TMP32]], align 1 // CHECK5-NEXT: store [4 x i8] [[TMP33]], ptr [[TMP12]], align 1 // CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[B11]], align 4 -// CHECK5-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[B16:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 // CHECK5-NEXT: [[TMP35:%.*]] = trunc i32 [[TMP34]] to i8 // CHECK5-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B16]], align 8 // CHECK5-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP35]], 15 @@ -3717,14 +3836,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3733,133 +3849,135 @@ // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 128 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 128 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 128 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 128 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP0]], align 128 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP1]], ptr align 128 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK5-NEXT: store i32 [[TMP28]], ptr [[TMP2]], align 128 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 128 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_4]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP29]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP28]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_4]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP30]]) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN16:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN16]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN16]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE17:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done17: -// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP31]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP33]]) // CHECK5-NEXT: ret void // // @@ -3879,7 +3997,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -3888,6 +4006,7 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -3902,7 +4021,9 @@ // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[THIS1]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -3910,41 +4031,41 @@ // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP1]], 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP2]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: @@ -3956,11 +4077,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3974,71 +4095,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK5-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP13]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK5-NEXT: store i32 [[TMP17]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP4]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK5-NEXT: ret void // // @@ -4050,7 +4173,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/for_linear_codegen.cpp b/clang/test/OpenMP/for_linear_codegen.cpp --- a/clang/test/OpenMP/for_linear_codegen.cpp +++ b/clang/test/OpenMP/for_linear_codegen.cpp @@ -192,18 +192,23 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(16) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 // CHECK1-NEXT: store i64 0, ptr [[LVAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[PVAR]], ptr [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP2]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -230,12 +235,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[LVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca ptr, align 8 @@ -245,112 +249,119 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP1]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTLINEAR_START1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK1-NEXT: [[DOTLVAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP5]], i64 8, ptr inttoptr (i64 5 to ptr)) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) +// CHECK1-NEXT: [[DOTLVAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP8]], i64 8, ptr inttoptr (i64 5 to ptr)) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP13]], 3 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL4]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP15]], 3 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP14]], [[CONV]] -// CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTLVAR__VOID_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[ADD_PTR7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 3 -// CHECK1-NEXT: store ptr [[ADD_PTR7]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP17]], 3 -// CHECK1-NEXT: store i64 [[ADD8]], ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 3 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL4]] to i64 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP17]], [[CONV]] +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 3 +// CHECK1-NEXT: store ptr [[ADD_PTR6]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP20]], 3 +// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTLVAR__VOID_ADDR]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTLVAR__VOID_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP4]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP5]], ptr [[DOTLVAR__VOID_ADDR]], ptr inttoptr (i64 5 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP8]], ptr [[DOTLVAR__VOID_ADDR]], ptr inttoptr (i64 5 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TEST]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 -// CHECK1-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TEST]], i32 0, i32 0 +// CHECK1-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F1]], ptr [[LVAR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[LVAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[PVAR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: ret i32 0 // @@ -370,6 +381,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -383,16 +395,18 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C:%.*]] = alloca ptr, align 8 @@ -417,124 +431,126 @@ // CHECK1-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK1-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C2]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C2]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 // CHECK1-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK1-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK1-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP25]], -1 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 // CHECK1-NEXT: store i32 [[DEC]], ptr [[B9]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP28]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP31]], ptr [[_TMP20]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP33]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK1-NEXT: store i32 [[TMP34]], ptr [[B]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP35]], ptr [[_TMP21]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP21]], align 8 -// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP37]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP39:%.*]] = trunc i32 [[TMP38]] to i8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP33]], ptr [[_TMP20]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP20]], align 8 +// CHECK1-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP37]], ptr [[_TMP21]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP21]], align 8 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP41:%.*]] = trunc i32 [[TMP40]] to i8 // CHECK1-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B22]], align 4 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP39]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP41]], 15 // CHECK1-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK1-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET]], ptr [[B22]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -570,12 +586,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[LVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -587,101 +602,103 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP10:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START3]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: store ptr [[LVAR5]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL8]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR4]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTLINEAR_START3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], [[MUL9]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[LVAR5]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[PVAR4]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 1 -// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR4]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL6]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP22]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR4]], align 8 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[_TMP12]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[LVAR5]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP12]], align 8 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[_TMP10]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP30]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -701,7 +718,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -710,20 +727,23 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -739,81 +759,83 @@ // CHECK1-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store ptr [[A3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[MUL6]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[A3]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP16]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP9]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: ret void // // @@ -865,6 +887,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -878,16 +901,18 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[THIS1]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C:%.*]] = alloca ptr, align 8 @@ -913,124 +938,126 @@ // CHECK3-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK3-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C2]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C2]], align 8 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]]) // CHECK3-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 // CHECK3-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK3-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK3-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B9]], ptr [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B9]], ptr [[TMP28]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP11]], align 8 +// CHECK3-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK3-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP32]], ptr [[_TMP20]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK3-NEXT: store i32 [[TMP35]], ptr [[B]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP36]], ptr [[_TMP21]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP21]], align 8 -// CHECK3-NEXT: store i32 [[TMP37]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP40:%.*]] = trunc i32 [[TMP39]] to i8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP34]], ptr [[_TMP20]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[A7]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP20]], align 8 +// CHECK3-NEXT: store i32 [[TMP35]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK3-NEXT: store i32 [[TMP37]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP38]], ptr [[_TMP21]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[C10]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP21]], align 8 +// CHECK3-NEXT: store i32 [[TMP39]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[B22:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP42:%.*]] = trunc i32 [[TMP41]] to i8 // CHECK3-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B22]], align 4 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP40]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP42]], 15 // CHECK3-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK3-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET]], ptr [[B22]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -1038,6 +1065,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1057,25 +1085,30 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], ptr [[TMP12]], ptr [[TMP14]], ptr [[TMP16]]) +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1090,136 +1123,139 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A7:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B9:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C10:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP11:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[_TMP20:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[_TMP17:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[_TMP18:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_START6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 -// CHECK3-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP7]], align 8 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP8]], align 8 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] -// CHECK3-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B9]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[MUL10]] +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], [[MUL12]] +// CHECK3-NEXT: store i32 [[ADD13]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[MUL14]] +// CHECK3-NEXT: store i32 [[ADD15]], ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP32]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP34]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP33]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK3-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP33]], ptr [[_TMP20]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK3-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP37]], ptr [[_TMP21]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP21]], align 8 -// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP38]], ptr [[_TMP17]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP17]], align 8 +// CHECK3-NEXT: store i32 [[TMP39]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP41]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP42]], ptr [[_TMP18]], align 8 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load ptr, ptr [[_TMP18]], align 8 +// CHECK3-NEXT: store i32 [[TMP43]], ptr [[TMP44]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1233,98 +1269,100 @@ // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK3-NEXT: [[_TMP12:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START2]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 5 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], [[MUL5]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP14]], 5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], [[MUL7]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 5 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[MUL7]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[G]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 5 // CHECK3-NEXT: store i32 [[ADD9]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load volatile i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 5 -// CHECK3-NEXT: store volatile i32 [[ADD10]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK3-NEXT: store volatile i32 [[ADD10]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], ptr @g, align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[_TMP12]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP12]], align 8 -// CHECK3-NEXT: store volatile i32 [[TMP26]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr @g, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP26]], ptr [[_TMP12]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP12]], align 8 +// CHECK3-NEXT: store volatile i32 [[TMP27]], ptr [[TMP28]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK3-NEXT: ret void // // @@ -1358,17 +1396,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1386,65 +1426,67 @@ // CHECK4-NEXT: [[_TMP13:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START2]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START2]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP12]], 5 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], [[MUL5]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], [[MUL5]] // CHECK4-NEXT: store i32 [[ADD6]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP14]], 5 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], [[MUL7]] +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START2]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP15]], 5 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[MUL7]] // CHECK4-NEXT: store i32 [[ADD8]], ptr [[G1]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[G]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 5 // CHECK4-NEXT: store i32 [[ADD9]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load volatile i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 5 -// CHECK4-NEXT: store volatile i32 [[ADD10]], ptr [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = load volatile i32, ptr [[TMP17]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 5 +// CHECK4-NEXT: store volatile i32 [[ADD10]], ptr [[TMP17]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store volatile i32 5, ptr [[TMP18]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store volatile i32 5, ptr [[TMP19]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1456,40 +1498,40 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP19:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP19]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP20]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED11:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP20]], ptr [[BLOCK_CAPTURED11]], align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP21]], ptr [[BLOCK_CAPTURED11]], align 8 // CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK4-NEXT: call void [[TMP24]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK4-NEXT: call void [[TMP23]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK4-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK4-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK4-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 [[TMP29]], ptr @g, align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP30]], ptr [[_TMP13]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK4-NEXT: store volatile i32 [[TMP31]], ptr [[TMP32]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: store i32 [[TMP27]], ptr @g, align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP28]], ptr [[_TMP13]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK4-NEXT: store volatile i32 [[TMP29]], ptr [[TMP30]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) // CHECK4-NEXT: ret void // // @@ -1513,6 +1555,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1526,16 +1569,18 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[THIS1]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C:%.*]] = alloca ptr, align 8 @@ -1561,73 +1606,75 @@ // CHECK4-NEXT: [[_TMP23:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[A1]], ptr [[A]], align 8 -// CHECK4-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C2]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[C]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C2]], align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[C]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 // CHECK4-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[MUL13]] -// CHECK4-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP19]], [[MUL15]] -// CHECK4-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] +// CHECK4-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP21]], [[MUL17]] +// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] +// CHECK4-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] // CHECK4-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 @@ -1640,58 +1687,58 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK4-NEXT: store ptr [[TMP23]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK4-NEXT: store ptr [[TMP25]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED19:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP24]], ptr [[BLOCK_CAPTURED19]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], ptr [[BLOCK_CAPTURED19]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED20:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK4-NEXT: store ptr [[TMP25]], ptr [[BLOCK_CAPTURED20]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP27]], align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP11]], align 8 +// CHECK4-NEXT: store ptr [[TMP27]], ptr [[BLOCK_CAPTURED20]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 // CHECK4-NEXT: call void [[TMP29]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK4-NEXT: br i1 [[TMP33]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK4-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP34]], ptr [[_TMP22]], align 8 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP22]], align 8 -// CHECK4-NEXT: store i32 [[TMP35]], ptr [[TMP36]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP37]], ptr [[B]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP38]], ptr [[_TMP23]], align 8 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP23]], align 8 -// CHECK4-NEXT: store i32 [[TMP39]], ptr [[TMP40]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP42:%.*]] = trunc i32 [[TMP41]] to i8 +// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP33]], ptr [[_TMP22]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP22]], align 8 +// CHECK4-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 +// CHECK4-NEXT: store i32 [[TMP36]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP37]], ptr [[_TMP23]], align 8 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP23]], align 8 +// CHECK4-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[B24:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP41:%.*]] = trunc i32 [[TMP40]] to i8 // CHECK4-NEXT: [[BF_LOAD:%.*]] = load i8, ptr [[B24]], align 4 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP42]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP41]], 15 // CHECK4-NEXT: [[BF_CLEAR:%.*]] = and i8 [[BF_LOAD]], -16 // CHECK4-NEXT: [[BF_SET:%.*]] = or i8 [[BF_CLEAR]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET]], ptr [[B24]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: ret void // // @@ -1700,6 +1747,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -1718,24 +1766,29 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 +// CHECK4-NEXT: store ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], ptr [[TMP5]], ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1750,128 +1803,130 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A7:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B9:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C10:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP11:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[_TMP20:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[_TMP21:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[_TMP17:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[_TMP18:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_START6]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK4-NEXT: store ptr [[A7]], ptr [[_TMP8]], align 8 -// CHECK4-NEXT: store ptr [[C10]], ptr [[_TMP11]], align 8 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP7]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP8]], align 8 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP19]], [[MUL13]] -// CHECK4-NEXT: store i32 [[ADD14]], ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL15:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[MUL15]] -// CHECK4-NEXT: store i32 [[ADD16]], ptr [[B9]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], [[MUL17]] -// CHECK4-NEXT: store i32 [[ADD18]], ptr [[C10]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP25]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP27]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B9]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP11]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP29]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP28]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[MUL10]] +// CHECK4-NEXT: store i32 [[ADD11]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTLINEAR_START5]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], [[MUL12]] +// CHECK4-NEXT: store i32 [[ADD13]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTLINEAR_START6]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP29]], 1 +// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP28]], [[MUL14]] +// CHECK4-NEXT: store i32 [[ADD15]], ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP7]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP30]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP32]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP34]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP33]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK4-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK4-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK4-NEXT: br i1 [[TMP32]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK4-NEXT: br i1 [[TMP37]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP33]], ptr [[_TMP20]], align 8 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[A7]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP20]], align 8 -// CHECK4-NEXT: store i32 [[TMP34]], ptr [[TMP35]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK4-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP37]], ptr [[_TMP21]], align 8 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[C10]], align 4 -// CHECK4-NEXT: [[C22:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP39:%.*]] = load ptr, ptr [[C22]], align 8 -// CHECK4-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 +// CHECK4-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP38]], ptr [[_TMP17]], align 8 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = load ptr, ptr [[_TMP17]], align 8 +// CHECK4-NEXT: store i32 [[TMP39]], ptr [[TMP40]], align 4 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP41]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP42]], ptr [[_TMP18]], align 8 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[C19:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP44:%.*]] = load ptr, ptr [[C19]], align 8 +// CHECK4-NEXT: store i32 [[TMP43]], ptr [[TMP44]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/for_private_codegen.cpp b/clang/test/OpenMP/for_private_codegen.cpp --- a/clang/test/OpenMP/for_private_codegen.cpp +++ b/clang/test/OpenMP/for_private_codegen.cpp @@ -116,7 +116,9 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -126,23 +128,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -169,10 +171,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -189,6 +192,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -206,73 +211,73 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -287,10 +292,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -301,50 +307,52 @@ // CHECK1-NEXT: [[I1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -352,34 +360,35 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -440,10 +449,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -453,96 +463,98 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -562,7 +574,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -575,7 +587,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -601,10 +613,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -621,67 +634,69 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 // CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 // CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP14]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -700,17 +715,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -727,42 +744,44 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 // CHECK4-NEXT: store i32 2, ptr [[SVAR]], align 4 // CHECK4-NEXT: store float 3.000000e+00, ptr [[SFVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 0 @@ -776,33 +795,33 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP10]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP11]], ptr [[BLOCK_CAPTURED4]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[SVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[BLOCK_CAPTURED5]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr, i32, float }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load float, ptr [[SFVAR]], align 4 -// CHECK4-NEXT: store float [[TMP12]], ptr [[BLOCK_CAPTURED6]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK4-NEXT: store float [[TMP13]], ptr [[BLOCK_CAPTURED6]], align 4 // CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK4-NEXT: call void [[TMP16]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/for_reduction_codegen.cpp b/clang/test/OpenMP/for_reduction_codegen.cpp --- a/clang/test/OpenMP/for_reduction_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_codegen.cpp @@ -542,8 +542,21 @@ // CHECK1-NEXT: [[VAR2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S], align 16 // CHECK1-NEXT: [[VAR3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_10:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_13:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_14:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_15:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_16:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_17:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_18:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_19:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_20:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_21:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 @@ -581,70 +594,124 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE8]], label [[ARRAYCTOR_CONT9:%.*]], label [[ARRAYCTOR_LOOP5]] // CHECK1: arrayctor.cont9: // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[VAR3]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 6, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[TMP1]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 10, [[TMP3]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP5]], align 16 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 10, i64 [[TMP3]], ptr [[VLA]], ptr [[VEC]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i64 10, i64 [[TMP3]], ptr [[VLA]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..5, i64 10, i64 [[TMP3]], ptr [[VLA]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[VVAR2]]) -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[TMP6]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[TMP7]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[TMP9]]) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() -// CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP10]]) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 10, [[TMP8]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP10]], align 16 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_10]]) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_13]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_13]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_14]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_14]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_15]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_15]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_16]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_16]]) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_17]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_17]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_18]]) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_19]]) +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..21, ptr [[OMP_OUTLINED_ARG_AGG_20]]) +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_21]]) +// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() +// CHECK1-NEXT: store i32 [[CALL22]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP36]]) +// CHECK1-NEXT: [[ARRAY_BEGIN23:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN23]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP37]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 40 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY14:%.*]] -// CHECK1: arraydestroy.body14: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST15:%.*]] = phi ptr [ [[TMP12]], [[ARRAYDESTROY_DONE12]] ], [ [[ARRAYDESTROY_ELEMENT16:%.*]], [[ARRAYDESTROY_BODY14]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT16]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST15]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT16]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE17:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT16]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE17]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY14]] -// CHECK1: arraydestroy.done18: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN19]], i64 4 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY20:%.*]] -// CHECK1: arraydestroy.body20: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi ptr [ [[TMP13]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT22]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST21]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT22]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE23:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT22]], [[ARRAY_BEGIN19]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE23]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY20]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN23]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done24: +// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN25]], i64 40 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY26:%.*]] +// CHECK1: arraydestroy.body26: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST27:%.*]] = phi ptr [ [[TMP38]], [[ARRAYDESTROY_DONE24]] ], [ [[ARRAYDESTROY_ELEMENT28:%.*]], [[ARRAYDESTROY_BODY26]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT28]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST27]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT28]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE29:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT28]], [[ARRAY_BEGIN25]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE29]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY26]] +// CHECK1: arraydestroy.done30: +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN31:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN31]], i64 4 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY32:%.*]] +// CHECK1: arraydestroy.body32: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST33:%.*]] = phi ptr [ [[TMP39]], [[ARRAYDESTROY_DONE30]] ], [ [[ARRAYDESTROY_ELEMENT34:%.*]], [[ARRAYDESTROY_BODY32]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT34]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST33]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT34]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE35:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT34]], [[ARRAY_BEGIN31]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE35]], label [[ARRAYDESTROY_DONE36:%.*]], label [[ARRAYDESTROY_BODY32]] +// CHECK1: arraydestroy.done36: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP40]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -671,16 +738,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[S_ARR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -689,198 +751,200 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR16:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR17:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP23:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP19:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[_TMP31:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP12]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP25]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR16]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR17]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = fadd float [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store float [[ADD12]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd float [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store float [[ADD8]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = fcmp une float [[CALL14]], 0.000000e+00 +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = fcmp une float [[CALL10]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP37:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL15]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV16:%.*]] = uitofp i1 [[TMP37]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV16]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP35:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL11]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV12:%.*]] = uitofp i1 [[TMP35]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV12]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP17:%.*]] = fcmp olt float [[TMP40]], [[TMP41]] -// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] -// CHECK1: cond.true18: -// CHECK1-NEXT: [[TMP42:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: br label [[COND_END20:%.*]] -// CHECK1: cond.false19: -// CHECK1-NEXT: [[TMP43:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END20]] -// CHECK1: cond.end20: -// CHECK1-NEXT: [[COND21:%.*]] = phi float [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] -// CHECK1-NEXT: store float [[COND21]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP13:%.*]] = fcmp olt float [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] +// CHECK1: cond.true14: +// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: br label [[COND_END16:%.*]] +// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END16]] +// CHECK1: cond.end16: +// CHECK1-NEXT: [[COND17:%.*]] = phi float [ [[TMP38]], [[COND_TRUE14]] ], [ [[TMP39]], [[COND_FALSE15]] ] +// CHECK1-NEXT: store float [[COND17]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP44:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP44]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL22:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL22]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL24:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL25:%.*]] = fcmp une float [[CALL24]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL25]], label [[LAND_RHS26:%.*]], label [[LAND_END29:%.*]] -// CHECK1: land.rhs26: -// CHECK1-NEXT: [[CALL27:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL28:%.*]] = fcmp une float [[CALL27]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END29]] -// CHECK1: land.end29: -// CHECK1-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL28]], [[LAND_RHS26]] ] -// CHECK1-NEXT: [[CONV30:%.*]] = uitofp i1 [[TMP48]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]], float noundef [[CONV30]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP23]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP23]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP51:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP3]] monotonic, align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = atomicrmw fadd ptr [[TMP2]], float [[TMP40]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL18:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL18]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL21]], label [[LAND_RHS22:%.*]], label [[LAND_END25:%.*]] +// CHECK1: land.rhs22: +// CHECK1-NEXT: [[CALL23:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL24:%.*]] = fcmp une float [[CALL23]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END25]] +// CHECK1: land.end25: +// CHECK1-NEXT: [[TMP42:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL24]], [[LAND_RHS22]] ] +// CHECK1-NEXT: [[CONV26:%.*]] = uitofp i1 [[TMP42]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]], float noundef [[CONV26]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP19]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP43:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP8]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP53:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END29]] ], [ [[TMP63:%.*]], [[COND_END35:%.*]] ] -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast i32 [[TMP53]] to float -// CHECK1-NEXT: store float [[TMP55]], ptr [[_TMP31]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load float, ptr [[_TMP31]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP32:%.*]] = fcmp olt float [[TMP56]], [[TMP57]] -// CHECK1-NEXT: br i1 [[CMP32]], label [[COND_TRUE33:%.*]], label [[COND_FALSE34:%.*]] -// CHECK1: cond.true33: -// CHECK1-NEXT: [[TMP58:%.*]] = load float, ptr [[_TMP31]], align 4 -// CHECK1-NEXT: br label [[COND_END35]] -// CHECK1: cond.false34: -// CHECK1-NEXT: [[TMP59:%.*]] = load float, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END35]] -// CHECK1: cond.end35: -// CHECK1-NEXT: [[COND36:%.*]] = phi float [ [[TMP58]], [[COND_TRUE33]] ], [ [[TMP59]], [[COND_FALSE34]] ] -// CHECK1-NEXT: store float [[COND36]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP62:%.*]] = cmpxchg ptr [[TMP3]], i32 [[TMP53]], i32 [[TMP60]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP63]] = extractvalue { i32, i1 } [[TMP62]], 0 -// CHECK1-NEXT: [[TMP64:%.*]] = extractvalue { i32, i1 } [[TMP62]], 1 -// CHECK1-NEXT: br i1 [[TMP64]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP44:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END25]] ], [ [[TMP52:%.*]], [[COND_END31:%.*]] ] +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i32 [[TMP44]] to float +// CHECK1-NEXT: store float [[TMP45]], ptr [[_TMP27]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load float, ptr [[_TMP27]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP28:%.*]] = fcmp olt float [[TMP46]], [[TMP47]] +// CHECK1-NEXT: br i1 [[CMP28]], label [[COND_TRUE29:%.*]], label [[COND_FALSE30:%.*]] +// CHECK1: cond.true29: +// CHECK1-NEXT: [[TMP48:%.*]] = load float, ptr [[_TMP27]], align 4 +// CHECK1-NEXT: br label [[COND_END31]] +// CHECK1: cond.false30: +// CHECK1-NEXT: [[TMP49:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END31]] +// CHECK1: cond.end31: +// CHECK1-NEXT: [[COND32:%.*]] = phi float [ [[TMP48]], [[COND_TRUE29]] ], [ [[TMP49]], [[COND_FALSE30]] ] +// CHECK1-NEXT: store float [[COND32]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = cmpxchg ptr [[TMP8]], i32 [[TMP44]], i32 [[TMP50]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP52]] = extractvalue { i32, i1 } [[TMP51]], 0 +// CHECK1-NEXT: [[TMP53:%.*]] = extractvalue { i32, i1 } [[TMP51]], 1 +// CHECK1-NEXT: br i1 [[TMP53]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP9]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -893,55 +957,55 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store float [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP16]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP22]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store float [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP11]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP15]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL2]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP19]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = fcmp une float [[CALL3]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = uitofp i1 [[TMP34]] to float +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = uitofp i1 [[TMP22]] to float // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP22]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP19]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP17]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP39]], [[COND_TRUE]] ], [ [[TMP40]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], ptr [[TMP19]], align 4 // CHECK1-NEXT: ret void // // @@ -976,15 +1040,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(160) [[ARRS:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -998,232 +1058,234 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] -// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA7:%.*]] = alloca i32, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA7]], [[TMP16]] +// CHECK1-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX3]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = add nuw i64 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP19]], align 16 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX8]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN11:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN11]] -// CHECK1-NEXT: [[ARRAYDECAY13:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX12]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY13]], i64 2 -// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX14]] to i64 -// CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = sdiv exact i64 [[TMP26]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[VLA15:%.*]] = alloca [[STRUCT_S]], i64 [[TMP28]], align 16 -// CHECK1-NEXT: store i64 [[TMP28]], ptr [[__VLA_EXPR1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA15]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY16:%.*]] = icmp eq ptr [[VLA15]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY16]], label [[OMP_ARRAYINIT_DONE21:%.*]], label [[OMP_ARRAYINIT_BODY17:%.*]] -// CHECK1: omp.arrayinit.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[VLA15]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYINIT_BODY17]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYINIT_DONE21]], label [[OMP_ARRAYINIT_BODY17]] -// CHECK1: omp.arrayinit.done21: -// CHECK1-NEXT: [[TMP32:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]] -// CHECK1-NEXT: [[TMP35:%.*]] = sdiv exact i64 [[TMP34]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA15]], i64 [[TMP35]] -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP39]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP40]], 9 +// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP25:%.*]] = sub i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = sdiv exact i64 [[TMP25]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP26]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX5]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 0, [[TMP29]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[TMP10]], i64 0, i64 [[LB_ADD_LEN8]] +// CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[ARRAYIDX9]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDECAY10]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 +// CHECK1-NEXT: [[TMP32:%.*]] = sub i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = sdiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP34:%.*]] = add nuw i64 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[VLA12:%.*]] = alloca [[STRUCT_S]], i64 [[TMP34]], align 16 +// CHECK1-NEXT: store i64 [[TMP34]], ptr [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA12]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY13:%.*]] = icmp eq ptr [[VLA12]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY13]], label [[OMP_ARRAYINIT_DONE18:%.*]], label [[OMP_ARRAYINIT_BODY14:%.*]] +// CHECK1: omp.arrayinit.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[VLA12]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYINIT_BODY14]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYINIT_DONE18]], label [[OMP_ARRAYINIT_BODY14]] +// CHECK1: omp.arrayinit.done18: +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA12]], i64 [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP43]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP44]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP41]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP45]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP43]], [[TMP44]] -// CHECK1-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP45]], 1 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP49]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP46]] -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP47]] to i64 -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[ARRAYIDX24]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX24]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[TMP50]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP51]] to i64 +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX20]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX21]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP52]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX21]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP49]], 1 -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP53]], 1 +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP51]]) -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA7]], ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP55]], ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VLA15]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP59:%.*]] = inttoptr i64 [[TMP28]] to ptr -// CHECK1-NEXT: store ptr [[TMP59]], ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 -// CHECK1-NEXT: [[TMP63:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP61]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP63]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP55]]) +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP58:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VLA12]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP34]] to ptr +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP63]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP64]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP64]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE30:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP65]], [[TMP66]] -// CHECK1-NEXT: store i32 [[ADD27]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST23:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP66]], [[TMP67]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST23]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE29:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP64]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done30: -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX9]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY31:%.*]] = icmp eq ptr [[ARRAYIDX9]], [[TMP67]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY31]], label [[OMP_ARRAYCPY_DONE38:%.*]], label [[OMP_ARRAYCPY_BODY32:%.*]] -// CHECK1: omp.arraycpy.body32: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST33:%.*]] = phi ptr [ [[VLA15]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT36:%.*]], [[OMP_ARRAYCPY_BODY32]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST34:%.*]] = phi ptr [ [[ARRAYIDX9]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT35:%.*]], [[OMP_ARRAYCPY_BODY32]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST33]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT35]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST34]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT36]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST33]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE37:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT35]], [[TMP67]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE37]], label [[OMP_ARRAYCPY_DONE38]], label [[OMP_ARRAYCPY_BODY32]] -// CHECK1: omp.arraycpy.done38: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP61]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX6]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY28:%.*]] = icmp eq ptr [[ARRAYIDX6]], [[TMP68]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY28]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY29:%.*]] +// CHECK1: omp.arraycpy.body29: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST30:%.*]] = phi ptr [ [[VLA12]], [[OMP_ARRAYCPY_DONE27]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[OMP_ARRAYCPY_BODY29]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST31:%.*]] = phi ptr [ [[ARRAYIDX6]], [[OMP_ARRAYCPY_DONE27]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY29]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST31]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST30]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST31]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST31]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST30]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP68]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY29]] +// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP63]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY39:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP70]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY39]], label [[OMP_ARRAYCPY_DONE46:%.*]], label [[OMP_ARRAYCPY_BODY40:%.*]] -// CHECK1: omp.arraycpy.body40: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST41:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT44:%.*]], [[OMP_ARRAYCPY_BODY40]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST42:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT43:%.*]], [[OMP_ARRAYCPY_BODY40]] ] -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST41]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST42]], i32 [[TMP71]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT43]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST42]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT44]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST41]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE45:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT43]], [[TMP70]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE45]], label [[OMP_ARRAYCPY_DONE46]], label [[OMP_ARRAYCPY_BODY40]] -// CHECK1: omp.arraycpy.done46: -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX9]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY47:%.*]] = icmp eq ptr [[ARRAYIDX9]], [[TMP73]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY47]], label [[OMP_ARRAYCPY_DONE55:%.*]], label [[OMP_ARRAYCPY_BODY48:%.*]] -// CHECK1: omp.arraycpy.body48: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST49:%.*]] = phi ptr [ [[VLA15]], [[OMP_ARRAYCPY_DONE46]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT53:%.*]], [[OMP_ARRAYCPY_BODY48]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST50:%.*]] = phi ptr [ [[ARRAYIDX9]], [[OMP_ARRAYCPY_DONE46]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT52:%.*]], [[OMP_ARRAYCPY_BODY48]] ] -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP75]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL51:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST50]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST49]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST50]], ptr align 4 [[CALL51]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP75]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT52]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST50]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT53]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST49]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE54:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT52]], [[TMP73]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE54]], label [[OMP_ARRAYCPY_DONE55]], label [[OMP_ARRAYCPY_BODY48]] -// CHECK1: omp.arraycpy.done55: +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY36:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP69]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY36]], label [[OMP_ARRAYCPY_DONE43:%.*]], label [[OMP_ARRAYCPY_BODY37:%.*]] +// CHECK1: omp.arraycpy.body37: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST38:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY37]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST39:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT40:%.*]], [[OMP_ARRAYCPY_BODY37]] ] +// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST38]], align 4 +// CHECK1-NEXT: [[TMP71:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST39]], i32 [[TMP70]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT40]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST39]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT41]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST38]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE42:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT40]], [[TMP69]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE42]], label [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_BODY37]] +// CHECK1: omp.arraycpy.done43: +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX6]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY44:%.*]] = icmp eq ptr [[ARRAYIDX6]], [[TMP72]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY44]], label [[OMP_ARRAYCPY_DONE52:%.*]], label [[OMP_ARRAYCPY_BODY45:%.*]] +// CHECK1: omp.arraycpy.body45: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST46:%.*]] = phi ptr [ [[VLA12]], [[OMP_ARRAYCPY_DONE43]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT50:%.*]], [[OMP_ARRAYCPY_BODY45]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST47:%.*]] = phi ptr [ [[ARRAYIDX6]], [[OMP_ARRAYCPY_DONE43]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY45]] ] +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP74]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL48:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST47]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST46]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST47]], ptr align 4 [[CALL48]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP74]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT49]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST47]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT50]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST46]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE51:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT49]], [[TMP72]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_DONE52]], label [[OMP_ARRAYCPY_BODY45]] +// CHECK1: omp.arraycpy.done52: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA15]], i64 [[TMP28]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA15]], [[TMP78]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE56:%.*]], label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA12]], i64 [[TMP34]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA12]], [[TMP75]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE53:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP78]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP75]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA15]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE56]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done56: -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP79]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA12]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE53]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done53: +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP76]]) // CHECK1-NEXT: ret void // // @@ -1235,61 +1297,58 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP24]] +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP19]], i64 [[TMP23]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP19]], [[TMP27]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP14]], i64 [[TMP17]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP14]], [[TMP21]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY3]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY4:%.*]] // CHECK1: omp.arraycpy.body4: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP16]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP19]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP14]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYCPY_BODY4]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST5]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST5]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP27]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP21]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY4]] // CHECK1: omp.arraycpy.done10: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(160) [[ARRS:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1298,196 +1357,198 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARRS4:%.*]] = alloca [10 x [4 x %struct.S]], align 16 +// CHECK1-NEXT: [[ARRS:%.*]] = alloca [10 x [4 x %struct.S]], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], [[TMP1]] -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -// CHECK1-NEXT: [[TMP6:%.*]] = udiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA3:%.*]] = alloca i32, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[VLA3]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA3]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +// CHECK1-NEXT: [[TMP11:%.*]] = udiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS4]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY5:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY5]], label [[OMP_ARRAYINIT_DONE10:%.*]], label [[OMP_ARRAYINIT_BODY6:%.*]] -// CHECK1: omp.arrayinit.body6: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY6]] ] -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYINIT_DONE10]], label [[OMP_ARRAYINIT_BODY6]] -// CHECK1: omp.arrayinit.done10: -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY1:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY1]], label [[OMP_ARRAYINIT_DONE6:%.*]], label [[OMP_ARRAYINIT_BODY2:%.*]] +// CHECK1: omp.arrayinit.body2: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY2]] ] +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST3]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYINIT_DONE6]], label [[OMP_ARRAYINIT_BODY2]] +// CHECK1: omp.arrayinit.done6: +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA3]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA3]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[ARRS4]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP31]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP34]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP6]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP3]], [[TMP37]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[ARRS4]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST22]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST21]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST22]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP37]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP31]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[TMP8]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK1: omp.arraycpy.body16: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE14]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE14]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST17]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST18]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT20]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP39]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY27:%.*]] = icmp eq ptr [[TMP2]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY27]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY28:%.*]] -// CHECK1: omp.arraycpy.body28: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST29:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY28]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST30:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY28]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST29]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST30]], i32 [[TMP41]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST30]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST29]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY28]] -// CHECK1: omp.arraycpy.done34: -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY35:%.*]] = icmp eq ptr [[TMP3]], [[TMP43]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY35]], label [[OMP_ARRAYCPY_DONE43:%.*]], label [[OMP_ARRAYCPY_BODY36:%.*]] -// CHECK1: omp.arraycpy.body36: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST37:%.*]] = phi ptr [ [[ARRS4]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST38:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT40:%.*]], [[OMP_ARRAYCPY_BODY36]] ] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[TMP6]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE30:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] +// CHECK1: omp.arraycpy.body24: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT28:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT27:%.*]], [[OMP_ARRAYCPY_BODY24]] ] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 [[TMP41]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT27]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT28]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE29:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT27]], [[TMP40]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE29]], label [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_BODY24]] +// CHECK1: omp.arraycpy.done30: +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY31:%.*]] = icmp eq ptr [[TMP8]], [[TMP43]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY31]], label [[OMP_ARRAYCPY_DONE39:%.*]], label [[OMP_ARRAYCPY_BODY32:%.*]] +// CHECK1: omp.arraycpy.body32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST33:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY32]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST34:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE30]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT36:%.*]], [[OMP_ARRAYCPY_BODY32]] ] // CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 // CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP45]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL39:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST38]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST37]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST38]], ptr align 4 [[CALL39]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL35:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST33]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST34]], ptr align 4 [[CALL35]], i64 4, i1 false) // CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP45]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT40]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT41]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE42:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT40]], [[TMP43]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE42]], label [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_BODY36]] -// CHECK1: omp.arraycpy.done43: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP31]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT36]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST34]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT37]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST33]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE38:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT36]], [[TMP43]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE38]], label [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_BODY32]] +// CHECK1: omp.arraycpy.done39: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN44:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS4]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN44]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN40:%.*]] = getelementptr inbounds [10 x [4 x %struct.S]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN40]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP48]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP46]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN44]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE45:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done45: -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP49]]) -// CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[TMP50]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP51]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN40]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE41:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done41: +// CHECK1-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP49]]) // CHECK1-NEXT: ret void // // @@ -1499,196 +1560,196 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP15]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP19]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP19]], [[TMP24]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP14]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP14]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY3]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY4:%.*]] // CHECK1: omp.arraycpy.body4: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP16]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP19]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP14]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYCPY_BODY4]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST5]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST5]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP24]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY4]] // CHECK1: omp.arraycpy.done10: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR6:%.*]] = alloca [1 x [2 x i32]], align 4 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [1 x [2 x i32]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP4]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX4]], i64 1 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], ptr [[ARR6]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP7]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX2]], i64 1 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [2 x i32]], ptr [[ARR]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[ARR6]], i64 [[TMP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[ARR]], i64 [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX8]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX6]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARR6]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP29]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP31]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP32]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done15: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP33]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP31]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK1: omp.arraycpy.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[ARR6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 [[TMP36]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK1: omp.arraycpy.done23: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP29]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK1: omp.arraycpy.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 [[TMP37]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP31]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP39]]) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP40]]) // CHECK1-NEXT: ret void // // @@ -1700,35 +1761,35 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1743,106 +1804,108 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP9:%.*]] = add nuw i64 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP9]], align 16 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP12]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP9]] to ptr -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP36]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP37]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP9]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP11]] // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: @@ -1855,44 +1918,44 @@ // CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP36]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP42]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] // CHECK1: omp.arraycpy.body13: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP44]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP44]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP42]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] // CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP36]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP47]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP43]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done21: -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP44]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP46]]) // CHECK1-NEXT: ret void // // @@ -1904,185 +1967,187 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR24]], i64 [[TMP11]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR2]], i64 [[TMP13]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR24]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done11: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] -// CHECK1: omp.arraycpy.body14: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST15]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr align 4 [[CALL17]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY14]] -// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] +// CHECK1: omp.arraycpy.body13: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] +// CHECK1: omp.arraycpy.done20: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN22:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN22]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 6 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN22]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE23:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done23: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done22: +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP36]]) // CHECK1-NEXT: ret void // // @@ -2094,182 +2159,184 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [1 x [6 x %struct.S]], align 16 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR24]], i64 [[TMP11]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR2]], i64 [[TMP13]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR24]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP30]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done11: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] -// CHECK1: omp.arraycpy.body14: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[VAR24]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST15]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST16]], ptr align 4 [[CALL17]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY14]] -// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX1]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] +// CHECK1: omp.arraycpy.body13: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP31]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] +// CHECK1: omp.arraycpy.done20: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN22:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR24]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN22]], i64 6 +// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [1 x [6 x %struct.S]], ptr [[VAR2]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 6 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN22]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE23:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done23: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done22: +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP36]]) // CHECK1-NEXT: ret void // // @@ -2281,133 +2348,135 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 6 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 6 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR24:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR2:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 1 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR24]], i64 [[TMP10]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TMP6]], i64 1 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR2]], i64 [[TMP12]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR24]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP13]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP15]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[CALL9]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP13]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[CALL8]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR24]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP13]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR2]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP15]]) // CHECK1-NEXT: ret void // // @@ -2419,159 +2488,161 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(20) [[VVAR2:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VVAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VVAR22:%.*]] = alloca [5 x %struct.S], align 16 +// CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[VVAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VVAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP0]], i64 0, i64 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[TMP2]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] -// CHECK1-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], ptr [[VVAR22]], i64 [[TMP6]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S]], ptr [[VVAR2]], i64 [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VVAR22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP24]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP26]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP22]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST5:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST5]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST5]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST4:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST4]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST4]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST4]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE7:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP27]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done8: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done7: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY9:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY9]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY10:%.*]] -// CHECK1: omp.arraycpy.body10: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY10]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY10]] ] -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP32]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST12]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST11]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST12]], ptr align 4 [[CALL13]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP32]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT15]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY10]] -// CHECK1: omp.arraycpy.done17: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP24]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] +// CHECK1: omp.arraycpy.body9: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY9]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY9]] ] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST11]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST10]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST11]], ptr align 4 [[CALL12]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT14]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY9]] +// CHECK1: omp.arraycpy.done16: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN18:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN18]], i64 5 +// CHECK1-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN17]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN18]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE19:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done19: -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP37]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done18: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP30]]) // CHECK1-NEXT: ret void // // @@ -2583,33 +2654,33 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2618,145 +2689,147 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR34]], i64 [[TMP10]] -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR3]], i64 [[TMP11]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR34]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] -// CHECK1: omp.arraycpy.body13: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] -// CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// CHECK1: omp.arraycpy.body12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] +// CHECK1: omp.arraycpy.done19: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN20]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done22: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done21: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -2768,33 +2841,33 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2803,145 +2876,147 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR34]], i64 [[TMP10]] -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S]], ptr [[VAR3]], i64 [[TMP11]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR34]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST8]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done11: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done10: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY12:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY12]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY13:%.*]] -// CHECK1: omp.arraycpy.body13: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY13]] ] -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr align 4 [[CALL16]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP34]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY13]] -// CHECK1: omp.arraycpy.done20: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] +// CHECK1: omp.arraycpy.body12: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] +// CHECK1: omp.arraycpy.done19: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN21:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN21]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN20]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN21]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE22:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done22: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP41]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done21: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -2953,33 +3028,33 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2995,46 +3070,48 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP3]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 -// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP8]], align 16 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[TMP5]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr ([[STRUCT_S:%.*]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP10:%.*]] = add nuw i64 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S]], i64 [[TMP10]], align 16 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP17]] -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S]], ptr [[VLA]], i64 [[TMP18]] +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -3080,19 +3157,19 @@ // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP36]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP10]] to ptr +// CHECK1-NEXT: store ptr [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP35]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP39]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -3101,47 +3178,47 @@ // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP39]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP37]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done10: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP36]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP8]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP42]] +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP38]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] // CHECK1: omp.arraycpy.body12: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP44]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP44]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP42]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP38]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] // CHECK1: omp.arraycpy.done19: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP36]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP47]] +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[VLA]], i64 [[TMP10]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP41]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE20:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done20: -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP42]]) +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP44]]) // CHECK1-NEXT: ret void // // @@ -3153,36 +3230,36 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[VAR3:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3191,136 +3268,138 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR33:%.*]] = alloca [4 x %struct.S], align 16 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [4 x %struct.S], align 16 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR33]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: store ptr [[VAR33]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR33]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP18]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.24, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP19]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.24, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP21]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP21]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR33]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST7]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST6]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP21]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done10: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE8:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT7]], [[TMP21]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done9: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY11:%.*]] = icmp eq ptr [[TMP2]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY11]], label [[OMP_ARRAYCPY_DONE19:%.*]], label [[OMP_ARRAYCPY_BODY12:%.*]] -// CHECK1: omp.arraycpy.body12: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST13:%.*]] = phi ptr [ [[VAR33]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY12]] ] -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP26]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST14]], ptr align 4 [[CALL15]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP26]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT17]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE18:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_BODY12]] -// CHECK1: omp.arraycpy.done19: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY10:%.*]] = icmp eq ptr [[TMP4]], [[TMP22]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY10]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY11:%.*]] +// CHECK1: omp.arraycpy.body11: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY11]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY11]] ] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST13]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST12]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST13]], ptr align 4 [[CALL14]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP24]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP22]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY11]] +// CHECK1: omp.arraycpy.done18: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN20:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR33]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN20]], i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN19]], i64 4 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN20]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done21: -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP31]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN19]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE20:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done20: +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP27]]) // CHECK1-NEXT: ret void // // @@ -3332,22 +3411,22 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -3358,64 +3437,97 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.12], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S.0], align 16 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S.12], align 16 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiLi42EET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN]], i64 42 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..25, ptr [[T_VAR]], ptr [[TMP1]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..29, ptr [[ARR]], ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[TMP3]]) +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..25, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN1:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN1]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN3]], i64 42 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN1]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN3]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY4:%.*]] -// CHECK1: arraydestroy.body4: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST5:%.*]] = phi ptr [ [[TMP5]], [[ARRAYDESTROY_DONE2]] ], [ [[ARRAYDESTROY_ELEMENT6:%.*]], [[ARRAYDESTROY_BODY4]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT6]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST5]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT6]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE7:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT6]], [[ARRAY_BEGIN3]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE7]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY4]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY6:%.*]] +// CHECK1: arraydestroy.body6: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST7:%.*]] = phi ptr [ [[TMP19]], [[ARRAYDESTROY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT8:%.*]], [[ARRAYDESTROY_BODY6]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT8]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYDESTROY_ELEMENTPAST7]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT8]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE9:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT8]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE9]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY6]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP6]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP20]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -3482,16 +3594,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..25 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3500,169 +3607,171 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR16:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR17:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP22:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12]], align 4 +// CHECK1-NEXT: [[REF_TMP18:%.*]] = alloca [[STRUCT_S_12]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP5]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[TMP12]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP25]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR16]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR17]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP9]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP16]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.26, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL13]], 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL9]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = icmp ne i32 [[CALL14]], 0 +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[CALL10]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP37:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL15]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP37]] to i32 +// CHECK1-NEXT: [[TMP35:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL11]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP35]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[CMP16:%.*]] = icmp slt i32 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] -// CHECK1: cond.true17: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: br label [[COND_END19:%.*]] -// CHECK1: cond.false18: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: br label [[COND_END19]] -// CHECK1: cond.end19: -// CHECK1-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE17]] ], [ [[TMP43]], [[COND_FALSE18]] ] -// CHECK1-NEXT: store i32 [[COND20]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE13:%.*]], label [[COND_FALSE14:%.*]] +// CHECK1: cond.true13: +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: br label [[COND_END15:%.*]] +// CHECK1: cond.false14: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END15]] +// CHECK1: cond.end15: +// CHECK1-NEXT: [[COND16:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE13]] ], [ [[TMP39]], [[COND_FALSE14]] ] +// CHECK1-NEXT: store i32 [[COND16]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP44]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL21:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL21]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL23:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[CALL23]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL24]], label [[LAND_RHS25:%.*]], label [[LAND_END28:%.*]] -// CHECK1: land.rhs25: -// CHECK1-NEXT: [[CALL26:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[CALL26]], 0 -// CHECK1-NEXT: br label [[LAND_END28]] -// CHECK1: land.end28: -// CHECK1-NEXT: [[TMP48:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL27]], [[LAND_RHS25]] ] -// CHECK1-NEXT: [[CONV29:%.*]] = zext i1 [[TMP48]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP22]], i32 noundef [[CONV29]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP22]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP22]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[T_VAR17]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = atomicrmw min ptr [[TMP3]], i32 [[TMP51]] monotonic, align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP40]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL17:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[CALL17]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL19:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL20:%.*]] = icmp ne i32 [[CALL19]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL20]], label [[LAND_RHS21:%.*]], label [[LAND_END24:%.*]] +// CHECK1: land.rhs21: +// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL23:%.*]] = icmp ne i32 [[CALL22]], 0 +// CHECK1-NEXT: br label [[LAND_END24]] +// CHECK1: land.end24: +// CHECK1-NEXT: [[TMP42:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL23]], [[LAND_RHS21]] ] +// CHECK1-NEXT: [[CONV25:%.*]] = zext i1 [[TMP42]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP18]], i32 noundef [[CONV25]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP18]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP18]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = atomicrmw min ptr [[TMP8]], i32 [[TMP43]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR16]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -3671,59 +3780,59 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP16]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP22]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP11]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP15]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP19]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP34]] to i32 +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP22]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP22]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP19]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE]] ], [ [[TMP40]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP19]], align 4 // CHECK1-NEXT: ret void // // @@ -3758,14 +3867,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3774,128 +3880,130 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP9:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 1, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 1, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[TMP6]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.28, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP11]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.28, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL7:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP26:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL8]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP26]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP27:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP27]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP11]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL9:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP28:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP34:%.*]], [[LAND_END14:%.*]] ] -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[_TMP10]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[_TMP10]], align 4 -// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL11]], label [[LAND_RHS12:%.*]], label [[LAND_END14]] -// CHECK1: land.rhs12: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK1-NEXT: br label [[LAND_END14]] -// CHECK1: land.end14: -// CHECK1-NEXT: [[TMP31:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL13]], [[LAND_RHS12]] ] -// CHECK1-NEXT: [[CONV15:%.*]] = zext i1 [[TMP31]] to i32 -// CHECK1-NEXT: store i32 [[CONV15]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP28]], i32 [[TMP32]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP34]] = extractvalue { i32, i1 } [[TMP33]], 0 -// CHECK1-NEXT: [[TMP35:%.*]] = extractvalue { i32, i1 } [[TMP33]], 1 -// CHECK1-NEXT: br i1 [[TMP35]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP29:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP35:%.*]], [[LAND_END13:%.*]] ] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[_TMP9]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[_TMP9]], align 4 +// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL10]], label [[LAND_RHS11:%.*]], label [[LAND_END13]] +// CHECK1: land.rhs11: +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TOBOOL12:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br label [[LAND_END13]] +// CHECK1: land.end13: +// CHECK1-NEXT: [[TMP32:%.*]] = phi i1 [ false, [[ATOMIC_CONT]] ], [ [[TOBOOL12]], [[LAND_RHS11]] ] +// CHECK1-NEXT: [[CONV14:%.*]] = zext i1 [[TMP32]] to i32 +// CHECK1-NEXT: store i32 [[CONV14]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP29]], i32 [[TMP33]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP35]] = extractvalue { i32, i1 } [[TMP34]], 0 +// CHECK1-NEXT: [[TMP36:%.*]] = extractvalue { i32, i1 } [[TMP34]], 1 +// CHECK1-NEXT: br i1 [[TMP36]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP11]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP11]]) // CHECK1-NEXT: ret void // // @@ -3907,35 +4015,31 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TOBOOL2:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP14:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL2]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP14]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL2]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP10]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(168) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -3944,168 +4048,170 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR4:%.*]] = alloca [40 x %struct.S.0], align 16 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [40 x %struct.S.12], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[REF_TMP20:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP19:%.*]] = alloca [[STRUCT_S_12]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.0], ptr [[TMP0]], i64 0, i64 40 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.0], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S.12], ptr [[TMP2]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARR4]], i64 [[TMP11]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr ([[STRUCT_S_12]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARR]], i64 [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP26]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP3]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX5]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.12], ptr [[TMP8]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARR4]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP35]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP37]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP35]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP38]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST10]]) -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CALL]], [[CALL11]] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD12]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST10]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST9]]) +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CALL]], [[CALL10]] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD11]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST9]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP38]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done15: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done14: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP41]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] -// CHECK1: omp.arraycpy.body17: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY17]] ] -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP43]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL21:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST19]]) -// CHECK1-NEXT: [[CALL22:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[CALL21]], [[CALL22]] -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP20]], i32 noundef [[ADD23]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST19]], ptr align 4 [[REF_TMP20]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP20]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP43]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP41]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY17]] -// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] +// CHECK1: omp.arraycpy.body16: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL20:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST18]]) +// CHECK1-NEXT: [[CALL21:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST17]]) +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CALL20]], [[CALL21]] +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]], i32 noundef [[ADD22]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST18]], ptr align 4 [[REF_TMP19]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP19]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY16]] +// CHECK1: omp.arraycpy.done26: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [40 x %struct.S.0], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN28]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [40 x %struct.S.12], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAY_BEGIN27]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP46]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_12]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done29: -// CHECK1-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP48]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done28: +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -4114,30 +4220,30 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_12:%.*]], align 4 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP10]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_12]], ptr [[TMP7]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]]) // CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL2]] // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[ADD]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_12]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: // CHECK1-NEXT: ret void @@ -4149,7 +4255,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_12:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile double, ptr @g, align 8 // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[F]], align 4 @@ -4164,7 +4270,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_12:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to double // CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr @g, align 8 @@ -4194,10 +4300,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4213,93 +4320,95 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 // CHECK3-NEXT: store double 0.000000e+00, ptr [[G1]], align 8 // CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK3-NEXT: store ptr [[G1]], ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr @g, align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr @g, align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[TMP19]], [[TMP20]] // CHECK3-NEXT: store double [[ADD5]], ptr @g, align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G1]], align 8 -// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store double [[ADD6]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G1]], align 8 +// CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store double [[ADD6]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw fadd ptr @g, double [[TMP25]] monotonic, align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[G1]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw fadd ptr [[TMP1]], double [[TMP27]] monotonic, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw fadd ptr @g, double [[TMP23]] monotonic, align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[G1]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw fadd ptr [[TMP2]], double [[TMP25]] monotonic, align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -4311,23 +4420,23 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load double, ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store double [[ADD]], ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK3-NEXT: store double [[ADD]], ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP11]], align 8 // CHECK3-NEXT: ret void // // @@ -4346,17 +4455,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4372,46 +4483,48 @@ // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr @g1, align 8 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK4-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr @g1, align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr @g1, align 8 // CHECK4-NEXT: store double 0.000000e+00, ptr [[G1]], align 8 // CHECK4-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK4-NEXT: store double 1.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 1 @@ -4423,55 +4536,55 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP11:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP11]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, ptr }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP12]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP13]], ptr [[BLOCK_CAPTURED4]], align 8 // CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK4-NEXT: call void [[TMP16]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[G]], ptr [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[G1]], ptr [[TMP21]], align 8 -// CHECK4-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[G]], ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[G1]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP25:%.*]] = load double, ptr @g, align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP25]], [[TMP26]] +// CHECK4-NEXT: [[TMP20:%.*]] = load double, ptr @g, align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP20]], [[TMP21]] // CHECK4-NEXT: store double [[ADD6]], ptr @g, align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load double, ptr [[G1]], align 8 -// CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP27]], [[TMP28]] -// CHECK4-NEXT: store double [[ADD7]], ptr [[TMP1]], align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load double, ptr [[G1]], align 8 +// CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store double [[ADD7]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP30:%.*]] = atomicrmw fadd ptr @g, double [[TMP29]] monotonic, align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load double, ptr [[G1]], align 8 -// CHECK4-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr [[TMP1]], double [[TMP31]] monotonic, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP3]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP24:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = atomicrmw fadd ptr @g, double [[TMP24]] monotonic, align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load double, ptr [[G1]], align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr [[TMP2]], double [[TMP26]] monotonic, align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // @@ -4498,22 +4611,22 @@ // CHECK4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = load double, ptr [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP18]], [[TMP19]] -// CHECK4-NEXT: store double [[ADD]], ptr [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[ADD2:%.*]] = fadd double [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store double [[ADD2]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK4-NEXT: store double [[ADD]], ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP9]], align 8 +// CHECK4-NEXT: [[ADD2:%.*]] = fadd double [[TMP14]], [[TMP15]] +// CHECK4-NEXT: store double [[ADD2]], ptr [[TMP11]], align 8 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -479,8 +479,11 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[S]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[S]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // @@ -496,45 +499,47 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[S:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[S1:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @.omp_initializer.(ptr noundef [[S1]], ptr noundef [[TMP0]]) +// CHECK1-NEXT: call void @.omp_initializer.(ptr noundef [[S]], ptr noundef [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 -// CHECK1-NEXT: call void @.omp_combiner.(ptr noundef [[TMP0]], ptr noundef [[S1]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @.omp_combiner.(ptr noundef [[TMP2]], ptr noundef [[S]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[S]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -584,10 +589,10 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, ptr @g, align 8 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr @g, align 8 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP1]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -616,8 +621,15 @@ // CHECK1-NEXT: [[VAR2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S.0], align 16 // CHECK1-NEXT: [[VAR3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_10:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_12:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_13:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_14:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_15:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 @@ -655,62 +667,100 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE8]], label [[ARRAYCTOR_CONT9:%.*]], label [[ARRAYCTOR_LOOP5]] // CHECK1: arrayctor.cont9: // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[VAR3]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 6, ptr @.omp_outlined..1, ptr [[T_VAR]], ptr [[TMP1]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 10, [[TMP3]] -// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP5]], align 16 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..9, i64 10, i64 [[TMP3]], ptr [[VLA]], ptr [[VEC]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..13, i64 10, i64 [[TMP3]], ptr [[VLA]], ptr [[ARRS]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..15, ptr [[VAR2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[VVAR2]]) -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..19, ptr [[TMP6]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..21, ptr [[TMP7]]) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() -// CHECK1-NEXT: store i32 [[CALL10]], ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP8]]) -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 5 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 10, [[TMP8]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP10]], align 16 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_10]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_10]]) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK1-NEXT: store i64 10, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_12]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VAR2]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_12]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_13]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_13]]) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_14]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_14]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_15]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..21, ptr [[OMP_OUTLINED_ARG_AGG_15]]) +// CHECK1-NEXT: [[CALL16:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() +// CHECK1-NEXT: store i32 [[CALL16]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP26]]) +// CHECK1-NEXT: [[ARRAY_BEGIN17:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN17]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[ARRAYCTOR_CONT9]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 40 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY14:%.*]] -// CHECK1: arraydestroy.body14: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST15:%.*]] = phi ptr [ [[TMP10]], [[ARRAYDESTROY_DONE12]] ], [ [[ARRAYDESTROY_ELEMENT16:%.*]], [[ARRAYDESTROY_BODY14]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT16]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST15]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT16]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE17:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT16]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE17]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY14]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN17]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE18:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done18: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN19]], i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN19:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN19]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY20:%.*]] // CHECK1: arraydestroy.body20: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi ptr [ [[TMP11]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST21:%.*]] = phi ptr [ [[TMP28]], [[ARRAYDESTROY_DONE18]] ], [ [[ARRAYDESTROY_ELEMENT22:%.*]], [[ARRAYDESTROY_BODY20]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT22]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST21]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT22]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE23:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT22]], [[ARRAY_BEGIN19]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE23]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY20]] // CHECK1: arraydestroy.done24: +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN25]], i64 4 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY26:%.*]] +// CHECK1: arraydestroy.body26: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST27:%.*]] = phi ptr [ [[TMP29]], [[ARRAYDESTROY_DONE24]] ], [ [[ARRAYDESTROY_ELEMENT28:%.*]], [[ARRAYDESTROY_BODY26]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT28]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST27]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT28]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE29:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT28]], [[ARRAY_BEGIN25]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE29]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY26]] +// CHECK1: arraydestroy.done30: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP12]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP30]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -737,16 +787,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(48) [[S_ARR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -755,138 +800,140 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR17:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR18:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @.omp_initializer..3(ptr noundef [[T_VAR3]], ptr noundef [[TMP0]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: call void @.omp_initializer..7(ptr noundef [[VAR17]], ptr noundef [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load float, ptr @.init, align 4 -// CHECK1-NEXT: store float [[TMP12]], ptr [[T_VAR18]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 +// CHECK1-NEXT: call void @.omp_initializer..3(ptr noundef [[T_VAR]], ptr noundef [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: call void @.omp_initializer..7(ptr noundef [[VAR1]], ptr noundef [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr @.init, align 4 +// CHECK1-NEXT: store float [[TMP15]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP21]] to i32 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP23]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP12]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX7]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP26]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR17]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR18]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP17]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP33]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR13]], ptr noundef [[ADD_PTR14]]) -// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR9]], ptr noundef [[ADD_PTR10]]) +// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP17]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR15]], ptr noundef [[ADD_PTR16]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR11]], ptr noundef [[ADD_PTR12]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP17]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP17]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR17]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR4]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP14]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -985,29 +1032,29 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP10]], ptr noundef [[TMP7]]) -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 4 -// CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..2(ptr noundef [[TMP7]], ptr noundef [[TMP5]]) +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 4 +// CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) -// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP22]], ptr noundef [[TMP19]]) -// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP28]], ptr noundef [[TMP25]]) +// CHECK1-NEXT: call void @.omp_combiner..6(ptr noundef [[TMP15]], ptr noundef [[TMP13]]) +// CHECK1-NEXT: call void @.omp_combiner..8(ptr noundef [[TMP19]], ptr noundef [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -1037,15 +1084,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(480) [[ARRS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1059,94 +1102,96 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] -// CHECK1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX6]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA7:%.*]] = alloca i32, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA7]], [[TMP16]] +// CHECK1-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 0 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX3]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = sdiv exact i64 [[TMP17]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP19:%.*]] = add nuw i64 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = mul nuw i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP19]], align 16 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @.omp_initializer..11(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP22]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[VLA7]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX9]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN12:%.*]] = add nsw i64 0, [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP4]], i64 0, i64 [[LB_ADD_LEN12]] -// CHECK1-NEXT: [[ARRAYDECAY14:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX13]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDECAY14]], i64 2 -// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX15]] to i64 -// CHECK1-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 -// CHECK1-NEXT: [[TMP26:%.*]] = sub i64 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = sdiv exact i64 [[TMP26]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = mul nuw i64 [[TMP28]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[VLA16:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP28]], align 16 -// CHECK1-NEXT: store i64 [[TMP28]], ptr [[__VLA_EXPR1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA16]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY17:%.*]] = icmp eq ptr [[VLA16]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY17]], label [[OMP_ARRAYINIT_DONE25:%.*]], label [[OMP_ARRAYINIT_BODY18:%.*]] -// CHECK1: omp.arrayinit.body18: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX10]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[VLA16]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYINIT_BODY18]] ] -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i64 4 -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR21]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP30]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYINIT_DONE25]], label [[OMP_ARRAYINIT_BODY18]] -// CHECK1: omp.arrayinit.done25: -// CHECK1-NEXT: [[TMP36:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 -// CHECK1-NEXT: [[TMP38:%.*]] = sub i64 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: [[TMP39:%.*]] = sdiv exact i64 [[TMP38]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA16]], i64 [[TMP39]] +// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP25:%.*]] = sub i64 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = sdiv exact i64 [[TMP25]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP26]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX6]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYDECAY]], i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP8]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 0, [[TMP29]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[TMP10]], i64 0, i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[ARRAYDECAY11:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[ARRAYIDX10]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDECAY11]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 +// CHECK1-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP32:%.*]] = sub i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = sdiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP34:%.*]] = add nuw i64 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP35:%.*]] = mul nuw i64 [[TMP34]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[VLA13:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP34]], align 16 +// CHECK1-NEXT: store i64 [[TMP34]], ptr [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA13]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY14:%.*]] = icmp eq ptr [[VLA13]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY14]], label [[OMP_ARRAYINIT_DONE22:%.*]], label [[OMP_ARRAYINIT_BODY15:%.*]] +// CHECK1: omp.arrayinit.body15: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX7]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYINIT_BODY15]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[VLA13]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYINIT_BODY15]] ] +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i64 4 +// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR18]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYINIT_DONE22]], label [[OMP_ARRAYINIT_BODY15]] +// CHECK1: omp.arrayinit.done22: +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = sub i64 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = sdiv exact i64 [[TMP39]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA13]], i64 [[TMP40]] // CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP43]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -1167,8 +1212,8 @@ // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP26:%.*]] = icmp sle i32 [[TMP47]], [[TMP48]] -// CHECK1-NEXT: br i1 [[CMP26]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP47]], [[TMP48]] +// CHECK1-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: @@ -1176,21 +1221,21 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP49]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP50]] +// CHECK1-NEXT: [[TMP50:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[TMP50]] // CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP51]] to i64 -// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX27]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX28]], align 4 +// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX24]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[ARRAYIDX25]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX28]], align 4 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX25]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP53]], 1 -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP53]], 1 +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1199,100 +1244,100 @@ // CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP55]]) // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA7]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP59:%.*]] = inttoptr i64 [[TMP13]] to ptr -// CHECK1-NEXT: store ptr [[TMP59]], ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VLA16]], ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP63:%.*]] = inttoptr i64 [[TMP28]] to ptr -// CHECK1-NEXT: store ptr [[TMP63]], ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[TMP64]], align 4 -// CHECK1-NEXT: [[TMP67:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP65]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP67]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP58:%.*]] = inttoptr i64 [[TMP19]] to ptr +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VLA13]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP61:%.*]] = inttoptr i64 [[TMP34]] to ptr +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP63]], i32 2, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP64]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP68]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST30:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST31:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST31]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST30]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST31]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST30]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP68]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done34: -// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX10]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY35:%.*]] = icmp eq ptr [[ARRAYIDX10]], [[TMP69]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY35]], label [[OMP_ARRAYCPY_DONE44:%.*]], label [[OMP_ARRAYCPY_BODY36:%.*]] -// CHECK1: omp.arraycpy.body36: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST37:%.*]] = phi ptr [ [[VLA16]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT42:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST38:%.*]] = phi ptr [ [[ARRAYIDX10]], [[OMP_ARRAYCPY_DONE34]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT41:%.*]], [[OMP_ARRAYCPY_BODY36]] ] -// CHECK1-NEXT: [[ADD_PTR39:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i64 4 -// CHECK1-NEXT: [[ADD_PTR40:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR39]], ptr noundef [[ADD_PTR40]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT41]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST38]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT42]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST37]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE43:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT41]], [[TMP69]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE43]], label [[OMP_ARRAYCPY_DONE44]], label [[OMP_ARRAYCPY_BODY36]] -// CHECK1: omp.arraycpy.done44: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP65]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST27:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST28:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST28]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST27]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST28]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST27]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP65]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done31: +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX7]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY32:%.*]] = icmp eq ptr [[ARRAYIDX7]], [[TMP66]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY32]], label [[OMP_ARRAYCPY_DONE41:%.*]], label [[OMP_ARRAYCPY_BODY33:%.*]] +// CHECK1: omp.arraycpy.body33: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST34:%.*]] = phi ptr [ [[VLA13]], [[OMP_ARRAYCPY_DONE31]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT39:%.*]], [[OMP_ARRAYCPY_BODY33]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST35:%.*]] = phi ptr [ [[ARRAYIDX7]], [[OMP_ARRAYCPY_DONE31]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY33]] ] +// CHECK1-NEXT: [[ADD_PTR36:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST35]], i64 4 +// CHECK1-NEXT: [[ADD_PTR37:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST34]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR36]], ptr noundef [[ADD_PTR37]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT38]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST35]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT39]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST34]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE40:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT38]], [[TMP66]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_DONE41]], label [[OMP_ARRAYCPY_BODY33]] +// CHECK1: omp.arraycpy.done41: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP63]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr i32, ptr [[ARRAYIDX3]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY45:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP74]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY45]], label [[OMP_ARRAYCPY_DONE52:%.*]], label [[OMP_ARRAYCPY_BODY46:%.*]] -// CHECK1: omp.arraycpy.body46: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST47:%.*]] = phi ptr [ [[VLA7]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT50:%.*]], [[OMP_ARRAYCPY_BODY46]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST48:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT49:%.*]], [[OMP_ARRAYCPY_BODY46]] ] -// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP76]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST48]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST47]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP76]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT49]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST48]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT50]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST47]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE51:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT49]], [[TMP74]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE51]], label [[OMP_ARRAYCPY_DONE52]], label [[OMP_ARRAYCPY_BODY46]] -// CHECK1: omp.arraycpy.done52: -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX10]], i64 [[TMP28]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY53:%.*]] = icmp eq ptr [[ARRAYIDX10]], [[TMP77]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY53]], label [[OMP_ARRAYCPY_DONE62:%.*]], label [[OMP_ARRAYCPY_BODY54:%.*]] -// CHECK1: omp.arraycpy.body54: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST55:%.*]] = phi ptr [ [[VLA16]], [[OMP_ARRAYCPY_DONE52]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT60:%.*]], [[OMP_ARRAYCPY_BODY54]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST56:%.*]] = phi ptr [ [[ARRAYIDX10]], [[OMP_ARRAYCPY_DONE52]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT59:%.*]], [[OMP_ARRAYCPY_BODY54]] ] -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP79]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR57:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST56]], i64 4 -// CHECK1-NEXT: [[ADD_PTR58:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST55]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR57]], ptr noundef [[ADD_PTR58]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP79]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT59]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST56]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT60]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST55]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE61:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT59]], [[TMP77]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE61]], label [[OMP_ARRAYCPY_DONE62]], label [[OMP_ARRAYCPY_BODY54]] -// CHECK1: omp.arraycpy.done62: +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr i32, ptr [[ARRAYIDX1]], i64 [[TMP19]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY42:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP67]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY42]], label [[OMP_ARRAYCPY_DONE49:%.*]], label [[OMP_ARRAYCPY_BODY43:%.*]] +// CHECK1: omp.arraycpy.body43: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST44:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST45:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT46:%.*]], [[OMP_ARRAYCPY_BODY43]] ] +// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP69]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST45]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST44]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP69]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT46]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST45]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT47]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST44]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE48:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT46]], [[TMP67]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE48]], label [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_BODY43]] +// CHECK1: omp.arraycpy.done49: +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX7]], i64 [[TMP34]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY50:%.*]] = icmp eq ptr [[ARRAYIDX7]], [[TMP70]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY50]], label [[OMP_ARRAYCPY_DONE59:%.*]], label [[OMP_ARRAYCPY_BODY51:%.*]] +// CHECK1: omp.arraycpy.body51: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST52:%.*]] = phi ptr [ [[VLA13]], [[OMP_ARRAYCPY_DONE49]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT57:%.*]], [[OMP_ARRAYCPY_BODY51]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST53:%.*]] = phi ptr [ [[ARRAYIDX7]], [[OMP_ARRAYCPY_DONE49]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT56:%.*]], [[OMP_ARRAYCPY_BODY51]] ] +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP72]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST53]], i64 4 +// CHECK1-NEXT: [[ADD_PTR55:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST52]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR54]], ptr noundef [[ADD_PTR55]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP72]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT56]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST53]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT57]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST52]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE58:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT56]], [[TMP70]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE58]], label [[OMP_ARRAYCPY_DONE59]], label [[OMP_ARRAYCPY_BODY51]] +// CHECK1: omp.arraycpy.done59: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA16]], i64 [[TMP28]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA16]], [[TMP84]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE63:%.*]], label [[ARRAYDESTROY_BODY:%.*]] +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA13]], i64 [[TMP34]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA13]], [[TMP73]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE60:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP84]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP73]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA16]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE63]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done63: -// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP85]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA13]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE60]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done60: +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP74]]) // CHECK1-NEXT: ret void // // @@ -1332,59 +1377,56 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP22]] to i64 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP24]] +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP19]], i64 [[TMP23]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP19]], [[TMP25]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP14]], i64 [[TMP17]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP14]], [[TMP19]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY3]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY4:%.*]] // CHECK1: omp.arraycpy.body4: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP16]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY4]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP19]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP14]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i64 4 // CHECK1-NEXT: [[ADD_PTR7:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST5]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR7]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT9]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST5]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP25]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP19]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY4]] // CHECK1: omp.arraycpy.done11: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(480) [[ARRS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARRS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1393,57 +1435,59 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARRS5:%.*]] = alloca [10 x [4 x %struct.S.0]], align 16 +// CHECK1-NEXT: [[ARRS:%.*]] = alloca [10 x [4 x %struct.S.0]], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARRS]], ptr [[ARRS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP0]], [[TMP1]] -// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -// CHECK1-NEXT: [[TMP6:%.*]] = udiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA3:%.*]] = alloca i32, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[VLA3]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA3]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +// CHECK1-NEXT: [[TMP11:%.*]] = udiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT1:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: call void @.omp_initializer..11(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT1]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT1]], [[TMP13]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY6:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY6]], label [[OMP_ARRAYINIT_DONE14:%.*]], label [[OMP_ARRAYINIT_BODY7:%.*]] -// CHECK1: omp.arrayinit.body7: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYINIT_BODY7]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYINIT_BODY7]] ] -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i64 4 -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR10]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP10]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYINIT_DONE14]], label [[OMP_ARRAYINIT_BODY7]] -// CHECK1: omp.arrayinit.done14: +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY2:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY2]], label [[OMP_ARRAYINIT_DONE10:%.*]], label [[OMP_ARRAYINIT_BODY3:%.*]] +// CHECK1: omp.arrayinit.body3: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST4:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT7:%.*]], [[OMP_ARRAYINIT_BODY3]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST5:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_ARRAYINIT_DONE]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYINIT_BODY3]] ] +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i64 4 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST4]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR6]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT7]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST4]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST5]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE9:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYINIT_DONE10]], label [[OMP_ARRAYINIT_BODY3]] +// CHECK1: omp.arrayinit.done10: // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) @@ -1464,8 +1508,8 @@ // CHECK1: omp.inner.for.cond: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP15:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: br i1 [[CMP15]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: @@ -1473,21 +1517,21 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA3]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] // CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX12]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX16]], align 4 +// CHECK1-NEXT: store i32 [[INC]], ptr [[ARRAYIDX12]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: // CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1496,101 +1540,101 @@ // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA3]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[ARRS5]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP36]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[ARRS]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP34]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP39]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP6]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST19]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST18]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP39]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[TMP3]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[ARRS5]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY24]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE22]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[OMP_ARRAYCPY_BODY24]] ] -// CHECK1-NEXT: [[ADD_PTR27:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i64 4 -// CHECK1-NEXT: [[ADD_PTR28:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR27]], ptr noundef [[ADD_PTR28]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done32: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST14:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST15]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST14]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST14]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP36]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done18: +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[TMP8]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] +// CHECK1: omp.arraycpy.body20: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE18]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] +// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i64 4 +// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR23]], ptr noundef [[ADD_PTR24]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] +// CHECK1: omp.arraycpy.done28: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr [[TMP2]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY33:%.*]] = icmp eq ptr [[TMP2]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY33]], label [[OMP_ARRAYCPY_DONE40:%.*]], label [[OMP_ARRAYCPY_BODY34:%.*]] -// CHECK1: omp.arraycpy.body34: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST35:%.*]] = phi ptr [ [[VLA3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT38:%.*]], [[OMP_ARRAYCPY_BODY34]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST36:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT37:%.*]], [[OMP_ARRAYCPY_BODY34]] ] -// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP47]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST36]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST35]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP47]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT37]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST36]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT38]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST35]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE39:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT37]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE39]], label [[OMP_ARRAYCPY_DONE40]], label [[OMP_ARRAYCPY_BODY34]] -// CHECK1: omp.arraycpy.done40: -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP3]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY41:%.*]] = icmp eq ptr [[TMP3]], [[TMP48]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY41]], label [[OMP_ARRAYCPY_DONE50:%.*]], label [[OMP_ARRAYCPY_BODY42:%.*]] -// CHECK1: omp.arraycpy.body42: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST43:%.*]] = phi ptr [ [[ARRS5]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT48:%.*]], [[OMP_ARRAYCPY_BODY42]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST44:%.*]] = phi ptr [ [[TMP3]], [[OMP_ARRAYCPY_DONE40]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT47:%.*]], [[OMP_ARRAYCPY_BODY42]] ] -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP50]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR45:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST44]], i64 4 -// CHECK1-NEXT: [[ADD_PTR46:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST43]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR45]], ptr noundef [[ADD_PTR46]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP50]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT47]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST44]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT48]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST43]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE49:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT47]], [[TMP48]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE49]], label [[OMP_ARRAYCPY_DONE50]], label [[OMP_ARRAYCPY_BODY42]] -// CHECK1: omp.arraycpy.done50: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr i32, ptr [[TMP6]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY29:%.*]] = icmp eq ptr [[TMP6]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY29]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY30:%.*]] +// CHECK1: omp.arraycpy.body30: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST31:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[OMP_ARRAYCPY_BODY30]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST32:%.*]] = phi ptr [ [[TMP6]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[OMP_ARRAYCPY_BODY30]] ] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST32]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST31]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST32]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST31]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY30]] +// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP8]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY37:%.*]] = icmp eq ptr [[TMP8]], [[TMP41]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY37]], label [[OMP_ARRAYCPY_DONE46:%.*]], label [[OMP_ARRAYCPY_BODY38:%.*]] +// CHECK1: omp.arraycpy.body38: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST39:%.*]] = phi ptr [ [[ARRS]], [[OMP_ARRAYCPY_DONE36]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT44:%.*]], [[OMP_ARRAYCPY_BODY38]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST40:%.*]] = phi ptr [ [[TMP8]], [[OMP_ARRAYCPY_DONE36]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT43:%.*]], [[OMP_ARRAYCPY_BODY38]] ] +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP43]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR41:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST40]], i64 4 +// CHECK1-NEXT: [[ADD_PTR42:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST39]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR41]], ptr noundef [[ADD_PTR42]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP43]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT43]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST40]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT44]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST39]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE45:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT43]], [[TMP41]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE45]], label [[OMP_ARRAYCPY_DONE46]], label [[OMP_ARRAYCPY_BODY38]] +// CHECK1: omp.arraycpy.done46: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN51:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS5]], i32 0, i32 0, i32 0 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN51]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN47:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN47]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP55]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN51]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE52:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done52: -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP56]]) -// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP58]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN47]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE48:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done48: +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP45]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP47]]) // CHECK1-NEXT: ret void // // @@ -1602,53 +1646,53 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP15]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @.omp_combiner..10(ptr noundef [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP19]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP14]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY3:%.*]] = icmp eq ptr [[TMP14]], [[TMP16]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY3]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY4:%.*]] // CHECK1: omp.arraycpy.body4: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP16]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY4]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP19]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST5:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY4]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST6:%.*]] = phi ptr [ [[TMP14]], [[OMP_ARRAYCPY_DONE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT8:%.*]], [[OMP_ARRAYCPY_BODY4]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i64 4 // CHECK1-NEXT: [[ADD_PTR7:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST5]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR7]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT8]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST6]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT9]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST5]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP22]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE10:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT8]], [[TMP16]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY4]] // CHECK1: omp.arraycpy.done11: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR2:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1663,32 +1707,34 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR2]], ptr [[VAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP4]], i64 6 -// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] -// CHECK1-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP9:%.*]] = add nuw i64 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP9]], align 16 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP12]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP6]], i64 6 +// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca [[STRUCT_S_0]], i64 [[TMP11]], align 16 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] @@ -1698,76 +1744,76 @@ // CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR4]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP14]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP22]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP20]] // CHECK1-NEXT: store ptr [[_TMP7]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP26]], 9 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP7]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP34]]) -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP38:%.*]] = inttoptr i64 [[TMP9]] to ptr -// CHECK1-NEXT: store ptr [[TMP38]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP40]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP42]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP35:%.*]] = inttoptr i64 [[TMP11]] to ptr +// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP37]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.16, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP43]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -1777,48 +1823,48 @@ // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR12]], ptr noundef [[ADD_PTR13]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP43]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP39]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done16: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP9]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP48]] +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX1]], i64 [[TMP11]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] // CHECK1: omp.arraycpy.body18: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP50]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i64 4 // CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR21]], ptr noundef [[ADD_PTR22]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP50]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP48]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP40]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] // CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP40]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP9]] -// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP55]] +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[VLA]], i64 [[TMP11]] +// CHECK1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP43]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP55]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[VLA]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done27: -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP56]]) -// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP58]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP44]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP46]]) // CHECK1-NEXT: ret void // // @@ -1830,180 +1876,182 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 // CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(60) [[VVAR2:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VVAR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VVAR22:%.*]] = alloca [5 x %struct.S.0], align 16 +// CHECK1-NEXT: [[VVAR2:%.*]] = alloca [5 x %struct.S.0], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[VVAR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VVAR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP0]], i64 0, i64 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[TMP2]], i64 0, i64 4 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT3:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 -// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) +// CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP1]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT3]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT3]], [[TMP3]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VVAR22]], i64 [[TMP10]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP16]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VVAR2]], i64 [[TMP7]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VVAR22]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP30]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VVAR2]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP22]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.18, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i64 4 -// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR9]], ptr noundef [[ADD_PTR10]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP31]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done13: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST6:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST7:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i64 4 +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST6]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR8]], ptr noundef [[ADD_PTR9]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST6]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP24]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done12: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE23:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] -// CHECK1: omp.arraycpy.body15: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[VVAR22]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP38]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i64 4 -// CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR18]], ptr noundef [[ADD_PTR19]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP38]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT21]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE22:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP36]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_BODY15]] -// CHECK1: omp.arraycpy.done23: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK1: omp.arraycpy.body14: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[VVAR2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR17:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i64 4 +// CHECK1-NEXT: [[ADD_PTR18:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR17]], ptr noundef [[ADD_PTR18]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP27]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT20]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN24:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN24]], i64 5 +// CHECK1-NEXT: [[ARRAY_BEGIN23:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN23]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN24]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE25:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done25: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP45]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN23]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE24:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done24: +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP30]]) // CHECK1-NEXT: ret void // // @@ -2015,34 +2063,34 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP10]], i64 5 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP7]], i64 5 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 // CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(48) [[VAR3:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2051,151 +2099,153 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR34:%.*]] = alloca [2 x %struct.S.0], align 16 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR3:%.*]] = alloca [2 x %struct.S.0], align 16 +// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP3]], i64 0, i64 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[TMP5]], i64 0, i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) +// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR4]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VAR34]], i64 [[TMP14]] -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr ([[STRUCT_S_0]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[VAR3]], i64 [[TMP11]] +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VAR34]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP32]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP26]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.20, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE15:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST10:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i64 4 -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR12]], ptr noundef [[ADD_PTR13]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP35]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done16: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP32]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT13:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i64 4 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR11]], ptr noundef [[ADD_PTR12]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT13]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE14:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT13]], [[TMP28]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done15: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE26:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] -// CHECK1: omp.arraycpy.body18: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[VAR34]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY18]] ] -// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i64 4 -// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR21]], ptr noundef [[ADD_PTR22]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP42]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT24]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE25:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_BODY18]] -// CHECK1: omp.arraycpy.done26: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP32]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAYIDX]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY16:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY16]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY17:%.*]] +// CHECK1: omp.arraycpy.body17: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST18:%.*]] = phi ptr [ [[VAR3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST19:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY17]] ] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i64 4 +// CHECK1-NEXT: [[ADD_PTR21:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR20]], ptr noundef [[ADD_PTR21]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST19]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST18]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP29]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY17]] +// CHECK1: omp.arraycpy.done25: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP26]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN27:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN27]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN26:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[VAR3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN26]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP47]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN27]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE28:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done28: -// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP49]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN26]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE27:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done27: +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP34]]) // CHECK1-NEXT: ret void // // @@ -2207,34 +2257,34 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP10]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP7]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 // CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..21 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(48) [[VAR3:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR3_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2248,130 +2298,132 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR3_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i64 48, ptr inttoptr (i64 6 to ptr)) +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[DOTVAR3__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP6]], i64 48, ptr inttoptr (i64 6 to ptr)) // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[DOTVAR3__VOID_ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 // CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 // CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP7]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: store ptr [[DOTVAR3__VOID_ADDR]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[DOTVAR3__VOID_ADDR]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[DOTVAR3__VOID_ADDR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.22, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP22]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi ptr [ [[DOTVAR3__VOID_ADDR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i64 4 // CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR10]], ptr noundef [[ADD_PTR11]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT12]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP22]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT12]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done14: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP2]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[TMP2]], [[TMP27]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[TMP4]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY15:%.*]] = icmp eq ptr [[TMP4]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY15]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY16:%.*]] // CHECK1: omp.arraycpy.body16: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST17:%.*]] = phi ptr [ [[DOTVAR3__VOID_ADDR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY16]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[TMP2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST18:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY16]] ] +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP6]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[ADD_PTR19:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i64 4 // CHECK1-NEXT: [[ADD_PTR20:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR19]], ptr noundef [[ADD_PTR20]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP6]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST18]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST17]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP27]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY16]] // CHECK1: omp.arraycpy.done24: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: [[ARRAY_BEGIN25:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[DOTVAR3__VOID_ADDR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN25]], i64 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN25]], i64 4 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN25]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE26:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done26: -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTVAR3__VOID_ADDR]], ptr inttoptr (i64 6 to ptr)) -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP6]], ptr [[DOTVAR3__VOID_ADDR]], ptr inttoptr (i64 6 to ptr)) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -2383,23 +2435,23 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP10]], i64 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP7]], i64 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 // CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: // CHECK1-NEXT: ret void @@ -2418,6 +2470,9 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ARR:%.*]] = alloca [42 x %struct.S], align 16 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiLi42EET_v.vec, i64 8, i1 false) @@ -2437,37 +2492,67 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 6, ptr @.omp_outlined..23, ptr [[T_VAR]], ptr [[TMP1]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..31, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..35, ptr [[ARR]], ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[TMP3]]) +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN1:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN1]], i64 42 +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN3]], i64 42 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN1]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN3]], i64 2 -// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY4:%.*]] -// CHECK1: arraydestroy.body4: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST5:%.*]] = phi ptr [ [[TMP5]], [[ARRAYDESTROY_DONE2]] ], [ [[ARRAYDESTROY_ELEMENT6:%.*]], [[ARRAYDESTROY_BODY4]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT6]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST5]], i64 -1 -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT6]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE7:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT6]], [[ARRAY_BEGIN3]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE7]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY4]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK1-NEXT: br label [[ARRAYDESTROY_BODY6:%.*]] +// CHECK1: arraydestroy.body6: +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST7:%.*]] = phi ptr [ [[TMP19]], [[ARRAYDESTROY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT8:%.*]], [[ARRAYDESTROY_BODY6]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT8]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST7]], i64 -1 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT8]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAYDESTROY_DONE9:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT8]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE9]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY6]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP6]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP20]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -2476,10 +2561,10 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, ptr @g, align 8 -// CHECK1-NEXT: [[CONV:%.*]] = fptrunc double [[TMP4]] to float +// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr @g, align 8 +// CHECK1-NEXT: [[CONV:%.*]] = fptrunc double [[TMP1]] to float // CHECK1-NEXT: store float [[CONV]], ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -2492,12 +2577,12 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load float, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP4]] to double -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, ptr @g, align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP5]] +// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP1]] to double +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, ptr @g, align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP2]] // CHECK1-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float // CHECK1-NEXT: store float [[CONV2]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -2526,16 +2611,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2544,134 +2624,136 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR17:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR18:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @.omp_initializer..25(ptr noundef [[T_VAR3]], ptr noundef [[TMP0]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: call void @.omp_initializer..27(ptr noundef [[VAR17]], ptr noundef [[TMP2]]) -// CHECK1-NEXT: call void @.omp_initializer..29(ptr noundef [[T_VAR18]], ptr noundef [[TMP3]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 +// CHECK1-NEXT: call void @.omp_initializer..25(ptr noundef [[T_VAR]], ptr noundef [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..5(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR3]]) +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: call void @.omp_initializer..27(ptr noundef [[VAR1]], ptr noundef [[TMP6]]) +// CHECK1-NEXT: call void @.omp_initializer..29(ptr noundef [[T_VAR1]], ptr noundef [[TMP8]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP17]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP5]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX11]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP12]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX7]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP25]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP13]]) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR4]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR17]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR18]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP13]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP34]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.30, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP32]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR13]], ptr noundef [[ADD_PTR14]]) -// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP13]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: [[ADD_PTR9:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR9]], ptr noundef [[ADD_PTR10]]) +// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 4 -// CHECK1-NEXT: [[ADD_PTR16:%.*]] = getelementptr inbounds i8, ptr [[VAR4]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR15]], ptr noundef [[ADD_PTR16]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP2]], ptr noundef [[VAR17]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP3]], ptr noundef [[T_VAR18]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR11:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 4 +// CHECK1-NEXT: [[ADD_PTR12:%.*]] = getelementptr inbounds i8, ptr [[VAR]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR11]], ptr noundef [[ADD_PTR12]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP6]], ptr noundef [[VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP8]], ptr noundef [[T_VAR1]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP16]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR17]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR4]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -2773,41 +2855,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP10]], ptr noundef [[TMP7]]) -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 4 -// CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..24(ptr noundef [[TMP7]], ptr noundef [[TMP5]]) +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 4 +// CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..4(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) -// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP22]], ptr noundef [[TMP19]]) -// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP28]], ptr noundef [[TMP25]]) +// CHECK1-NEXT: call void @.omp_combiner..26(ptr noundef [[TMP15]], ptr noundef [[TMP13]]) +// CHECK1-NEXT: call void @.omp_combiner..28(ptr noundef [[TMP19]], ptr noundef [[TMP17]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2816,96 +2895,98 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @.init.32, align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr @.init.32, align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP16]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX5]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP21]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR3]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.33, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.33, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP25]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP0]], ptr noundef [[T_VAR3]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP2]], ptr noundef [[T_VAR]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -2917,12 +2998,12 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP10]], ptr noundef [[TMP7]]) +// CHECK1-NEXT: call void @.omp_combiner..34(ptr noundef [[TMP7]], ptr noundef [[TMP5]]) // CHECK1-NEXT: ret void // // @@ -2942,15 +3023,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(504) [[ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(24) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 @@ -2959,164 +3036,166 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARR4:%.*]] = alloca [40 x %struct.S], align 16 +// CHECK1-NEXT: [[ARR:%.*]] = alloca [40 x %struct.S], align 16 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP0]], i64 0, i64 40 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[TMP2]], i64 0, i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[ARRAYIDX]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT6:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT5:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 -// CHECK1-NEXT: [[ADD_PTR5:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 -// CHECK1-NEXT: call void @.omp_initializer..37(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR5]]) +// CHECK1-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 +// CHECK1-NEXT: call void @.omp_initializer..37(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR4]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT6]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT6]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT5]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP12]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = sub i64 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[TMP15:%.*]] = sdiv exact i64 [[TMP14]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARR4]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr ([[STRUCT_S]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARR]], i64 [[TMP16]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX10]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP29]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP8]], i64 0, i64 [[IDXPROM8]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX9]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP28]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP33]]) -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARR4]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP37]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARR]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3]], i32 [[TMP35]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.38, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST12:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i64 4 -// CHECK1-NEXT: [[ADD_PTR15:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR14]], ptr noundef [[ADD_PTR15]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST12]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP40]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST11:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[ADD_PTR13:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i64 4 +// CHECK1-NEXT: [[ADD_PTR14:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR13]], ptr noundef [[ADD_PTR14]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST11]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP37]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE28:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[ARR4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT26:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY20]] ] -// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP47]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i64 4 -// CHECK1-NEXT: [[ADD_PTR24:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i64 4 -// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR23]], ptr noundef [[ADD_PTR24]]) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP47]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT25]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT26]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE27:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT25]], [[TMP45]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_DONE28]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done28: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAYIDX]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE27:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[ARR]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT25:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT24:%.*]], [[OMP_ARRAYCPY_BODY19]] ] +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[ADD_PTR22:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i64 4 +// CHECK1-NEXT: [[ADD_PTR23:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i64 4 +// CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR22]], ptr noundef [[ADD_PTR23]]) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP40]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT24]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT25]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE26:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT24]], [[TMP38]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE26]], label [[OMP_ARRAYCPY_DONE27]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done27: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP35]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[ARRAY_BEGIN29:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN29]], i64 40 +// CHECK1-NEXT: [[ARRAY_BEGIN28:%.*]] = getelementptr inbounds [40 x %struct.S], ptr [[ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN28]], i64 40 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP52]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_REDUCTION_DEFAULT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN29]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE30:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done30: -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP54]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN28]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE29:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done29: +// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP43]]) // CHECK1-NEXT: ret void // // @@ -3154,23 +3233,23 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP10]], i64 40 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[TMP7]], i64 40 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i64 4 // CHECK1-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4 // CHECK1-NEXT: call void @.omp_combiner..36(ptr noundef [[ADD_PTR]], ptr noundef [[ADD_PTR2]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done3: // CHECK1-NEXT: ret void @@ -3184,12 +3263,12 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double -// CHECK1-NEXT: [[TMP5:%.*]] = load volatile double, ptr @g, align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP5]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, ptr @g, align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP2]] // CHECK1-NEXT: [[CONV2:%.*]] = fptosi double [[ADD]] to i32 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -3208,23 +3287,23 @@ // CHECK3-NEXT: call void @.omp_initializer.(ptr noundef [[S1]], ptr noundef [[S]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 @@ -3351,170 +3430,170 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE8]], label [[ARRAYCTOR_CONT9:%.*]], label [[ARRAYCTOR_LOOP5]] // CHECK3: arrayctor.cont9: // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[VAR3]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP10]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP10]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 // CHECK3-NEXT: br label [[FOR_COND:%.*]] // CHECK3: for.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 2 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2 // CHECK3-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK3: for.body: -// CHECK3-NEXT: [[TMP4:%.*]] = load float, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fptosi float [[TMP4]] to i32 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK3-NEXT: [[TMP3:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fptosi float [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] // CHECK3-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP10]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP10]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP6]] to i64 // CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM11]] -// CHECK3-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX12]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP6]]) +// CHECK3-NEXT: [[CALL13:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX12]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP5]]) // CHECK3-NEXT: br label [[FOR_INC:%.*]] // CHECK3: for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: for.end: // CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = call ptr @llvm.stacksave() -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[SAVED_STACK]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = mul nuw i64 10, [[TMP10]] -// CHECK3-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP12]], align 16 -// CHECK3-NEXT: store i64 [[TMP10]], ptr [[__VLA_EXPR0]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = mul nuw i64 10, [[TMP9]] +// CHECK3-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP11]], align 16 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I15]], align 4 // CHECK3-NEXT: br label [[FOR_COND16:%.*]] // CHECK3: for.cond16: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I15]], align 4 -// CHECK3-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP13]], 10 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I15]], align 4 +// CHECK3-NEXT: [[CMP17:%.*]] = icmp slt i32 [[TMP12]], 10 // CHECK3-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END25:%.*]] // CHECK3: for.body18: -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP10]] -// CHECK3-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I15]], align 4 -// CHECK3-NEXT: [[IDXPROM20:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK3-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK3-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I15]], align 4 +// CHECK3-NEXT: [[IDXPROM20:%.*]] = sext i32 [[TMP14]] to i64 // CHECK3-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX19]], i64 [[IDXPROM20]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX21]], align 4 -// CHECK3-NEXT: [[INC22:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX21]], align 4 +// CHECK3-NEXT: [[INC22:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[INC22]], ptr [[ARRAYIDX21]], align 4 // CHECK3-NEXT: br label [[FOR_INC23:%.*]] // CHECK3: for.inc23: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I15]], align 4 -// CHECK3-NEXT: [[INC24:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I15]], align 4 +// CHECK3-NEXT: [[INC24:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[INC24]], ptr [[I15]], align 4 // CHECK3-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: for.end25: // CHECK3-NEXT: store i32 0, ptr [[I26]], align 4 // CHECK3-NEXT: br label [[FOR_COND27:%.*]] // CHECK3: for.cond27: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I26]], align 4 -// CHECK3-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP18]], 10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I26]], align 4 +// CHECK3-NEXT: [[CMP28:%.*]] = icmp slt i32 [[TMP17]], 10 // CHECK3-NEXT: br i1 [[CMP28]], label [[FOR_BODY29:%.*]], label [[FOR_END36:%.*]] // CHECK3: for.body29: -// CHECK3-NEXT: [[TMP19:%.*]] = mul nsw i64 1, [[TMP10]] -// CHECK3-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I26]], align 4 -// CHECK3-NEXT: [[IDXPROM31:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK3-NEXT: [[TMP18:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK3-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP18]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I26]], align 4 +// CHECK3-NEXT: [[IDXPROM31:%.*]] = sext i32 [[TMP19]] to i64 // CHECK3-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX30]], i64 [[IDXPROM31]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX32]], align 4 -// CHECK3-NEXT: [[INC33:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX32]], align 4 +// CHECK3-NEXT: [[INC33:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[INC33]], ptr [[ARRAYIDX32]], align 4 // CHECK3-NEXT: br label [[FOR_INC34:%.*]] // CHECK3: for.inc34: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I26]], align 4 -// CHECK3-NEXT: [[INC35:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I26]], align 4 +// CHECK3-NEXT: [[INC35:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[INC35]], ptr [[I26]], align 4 // CHECK3-NEXT: br label [[FOR_COND27]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: for.end36: // CHECK3-NEXT: store i32 0, ptr [[I37]], align 4 // CHECK3-NEXT: br label [[FOR_COND38:%.*]] // CHECK3: for.cond38: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I37]], align 4 -// CHECK3-NEXT: [[CMP39:%.*]] = icmp slt i32 [[TMP23]], 10 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I37]], align 4 +// CHECK3-NEXT: [[CMP39:%.*]] = icmp slt i32 [[TMP22]], 10 // CHECK3-NEXT: br i1 [[CMP39]], label [[FOR_BODY40:%.*]], label [[FOR_END43:%.*]] // CHECK3: for.body40: // CHECK3-NEXT: br label [[FOR_INC41:%.*]] // CHECK3: for.inc41: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I37]], align 4 -// CHECK3-NEXT: [[INC42:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I37]], align 4 +// CHECK3-NEXT: [[INC42:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK3-NEXT: store i32 [[INC42]], ptr [[I37]], align 4 // CHECK3-NEXT: br label [[FOR_COND38]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: for.end43: // CHECK3-NEXT: store i32 0, ptr [[I44]], align 4 // CHECK3-NEXT: br label [[FOR_COND45:%.*]] // CHECK3: for.cond45: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I44]], align 4 -// CHECK3-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP25]], 10 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I44]], align 4 +// CHECK3-NEXT: [[CMP46:%.*]] = icmp slt i32 [[TMP24]], 10 // CHECK3-NEXT: br i1 [[CMP46]], label [[FOR_BODY47:%.*]], label [[FOR_END50:%.*]] // CHECK3: for.body47: // CHECK3-NEXT: br label [[FOR_INC48:%.*]] // CHECK3: for.inc48: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I44]], align 4 -// CHECK3-NEXT: [[INC49:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I44]], align 4 +// CHECK3-NEXT: [[INC49:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK3-NEXT: store i32 [[INC49]], ptr [[I44]], align 4 // CHECK3-NEXT: br label [[FOR_COND45]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: for.end50: -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK3-NEXT: store ptr [[TMP27]], ptr [[_TMP51]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP51]], align 8 -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[_TMP52]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK3-NEXT: store ptr [[TMP26]], ptr [[_TMP51]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP51]], align 8 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[_TMP52]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I53]], align 4 // CHECK3-NEXT: br label [[FOR_COND54:%.*]] // CHECK3: for.cond54: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I53]], align 4 -// CHECK3-NEXT: [[CMP55:%.*]] = icmp slt i32 [[TMP29]], 10 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I53]], align 4 +// CHECK3-NEXT: [[CMP55:%.*]] = icmp slt i32 [[TMP28]], 10 // CHECK3-NEXT: br i1 [[CMP55]], label [[FOR_BODY56:%.*]], label [[FOR_END59:%.*]] // CHECK3: for.body56: // CHECK3-NEXT: br label [[FOR_INC57:%.*]] // CHECK3: for.inc57: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I53]], align 4 -// CHECK3-NEXT: [[INC58:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I53]], align 4 +// CHECK3-NEXT: [[INC58:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK3-NEXT: store i32 [[INC58]], ptr [[I53]], align 4 // CHECK3-NEXT: br label [[FOR_COND54]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: for.end59: -// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[VAR3]], align 8 -// CHECK3-NEXT: store ptr [[TMP31]], ptr [[_TMP60]], align 8 -// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP60]], align 8 -// CHECK3-NEXT: store ptr [[TMP32]], ptr [[_TMP61]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[VAR3]], align 8 +// CHECK3-NEXT: store ptr [[TMP30]], ptr [[_TMP60]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP60]], align 8 +// CHECK3-NEXT: store ptr [[TMP31]], ptr [[_TMP61]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I62]], align 4 // CHECK3-NEXT: br label [[FOR_COND63:%.*]] // CHECK3: for.cond63: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I62]], align 4 -// CHECK3-NEXT: [[CMP64:%.*]] = icmp slt i32 [[TMP33]], 10 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[I62]], align 4 +// CHECK3-NEXT: [[CMP64:%.*]] = icmp slt i32 [[TMP32]], 10 // CHECK3-NEXT: br i1 [[CMP64]], label [[FOR_BODY65:%.*]], label [[FOR_END68:%.*]] // CHECK3: for.body65: // CHECK3-NEXT: br label [[FOR_INC66:%.*]] // CHECK3: for.inc66: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[I62]], align 4 -// CHECK3-NEXT: [[INC67:%.*]] = add nsw i32 [[TMP34]], 1 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[I62]], align 4 +// CHECK3-NEXT: [[INC67:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[INC67]], ptr [[I62]], align 4 // CHECK3-NEXT: br label [[FOR_COND63]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: for.end68: // CHECK3-NEXT: [[CALL69:%.*]] = call noundef i32 @_Z5tmainIiLi42EET_v() // CHECK3-NEXT: store i32 [[CALL69]], ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK3-NEXT: call void @llvm.stackrestore(ptr [[TMP35]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK3-NEXT: call void @llvm.stackrestore(ptr [[TMP34]]) // CHECK3-NEXT: [[ARRAY_BEGIN70:%.*]] = getelementptr inbounds [5 x %struct.S.0], ptr [[VVAR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN70]], i64 5 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN70]], i64 5 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[FOR_END68]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[FOR_END68]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR7]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN70]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE71:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK3: arraydestroy.done71: // CHECK3-NEXT: [[ARRAY_BEGIN72:%.*]] = getelementptr inbounds [10 x [4 x %struct.S.0]], ptr [[ARRS]], i32 0, i32 0, i32 0 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN72]], i64 40 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN72]], i64 40 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY73:%.*]] // CHECK3: arraydestroy.body73: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST74:%.*]] = phi ptr [ [[TMP37]], [[ARRAYDESTROY_DONE71]] ], [ [[ARRAYDESTROY_ELEMENT75:%.*]], [[ARRAYDESTROY_BODY73]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST74:%.*]] = phi ptr [ [[TMP36]], [[ARRAYDESTROY_DONE71]] ], [ [[ARRAYDESTROY_ELEMENT75:%.*]], [[ARRAYDESTROY_BODY73]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT75]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST74]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT75]]) #[[ATTR7]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE76:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT75]], [[ARRAY_BEGIN72]] @@ -3522,18 +3601,18 @@ // CHECK3: arraydestroy.done77: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR7]] // CHECK3-NEXT: [[ARRAY_BEGIN78:%.*]] = getelementptr inbounds [4 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN78]], i64 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN78]], i64 4 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY79:%.*]] // CHECK3: arraydestroy.body79: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST80:%.*]] = phi ptr [ [[TMP38]], [[ARRAYDESTROY_DONE77]] ], [ [[ARRAYDESTROY_ELEMENT81:%.*]], [[ARRAYDESTROY_BODY79]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST80:%.*]] = phi ptr [ [[TMP37]], [[ARRAYDESTROY_DONE77]] ], [ [[ARRAYDESTROY_ELEMENT81:%.*]], [[ARRAYDESTROY_BODY79]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT81]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST80]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT81]]) #[[ATTR7]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE82:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT81]], [[ARRAY_BEGIN78]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE82]], label [[ARRAYDESTROY_DONE83:%.*]], label [[ARRAYDESTROY_BODY79]] // CHECK3: arraydestroy.done83: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR7]] -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: ret i32 [[TMP39]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: ret i32 [[TMP38]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -3600,96 +3679,96 @@ // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 // CHECK3-NEXT: br label [[FOR_COND:%.*]] // CHECK3: for.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 2 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 2 // CHECK3-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK3: for.body: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP6]] to i64 // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] -// CHECK3-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP6]]) +// CHECK3-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP5]]) // CHECK3-NEXT: br label [[FOR_INC:%.*]] // CHECK3: for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: for.end: -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[_TMP5]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I6]], align 4 // CHECK3-NEXT: br label [[FOR_COND7:%.*]] // CHECK3: for.cond7: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP11]], 2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 [[TMP10]], 2 // CHECK3-NEXT: br i1 [[CMP8]], label [[FOR_BODY9:%.*]], label [[FOR_END17:%.*]] // CHECK3: for.body9: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK3-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK3-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP12]] to i64 // CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM10]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX11]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK3-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK3-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP14]] to i64 // CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM12]] -// CHECK3-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX13]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP14]]) +// CHECK3-NEXT: [[CALL14:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX13]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP13]]) // CHECK3-NEXT: br label [[FOR_INC15:%.*]] // CHECK3: for.inc15: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK3-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK3-NEXT: [[INC16:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[INC16]], ptr [[I6]], align 4 // CHECK3-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: for.end17: -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[_TMP18]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP18]], align 8 -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[_TMP19]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[_TMP18]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP18]], align 8 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[_TMP19]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I20]], align 4 // CHECK3-NEXT: br label [[FOR_COND21:%.*]] // CHECK3: for.cond21: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I20]], align 4 -// CHECK3-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP19]], 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I20]], align 4 +// CHECK3-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP18]], 2 // CHECK3-NEXT: br i1 [[CMP22]], label [[FOR_BODY23:%.*]], label [[FOR_END31:%.*]] // CHECK3: for.body23: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I20]], align 4 -// CHECK3-NEXT: [[IDXPROM24:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I20]], align 4 +// CHECK3-NEXT: [[IDXPROM24:%.*]] = sext i32 [[TMP20]] to i64 // CHECK3-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM24]] -// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX25]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP19]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I20]], align 4 -// CHECK3-NEXT: [[IDXPROM26:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX25]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP19]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I20]], align 4 +// CHECK3-NEXT: [[IDXPROM26:%.*]] = sext i32 [[TMP22]] to i64 // CHECK3-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM26]] -// CHECK3-NEXT: [[CALL28:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX27]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP22]]) +// CHECK3-NEXT: [[CALL28:%.*]] = call noundef nonnull align 4 dereferenceable(12) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYIDX27]], ptr noundef nonnull align 4 dereferenceable(12) [[TMP21]]) // CHECK3-NEXT: br label [[FOR_INC29:%.*]] // CHECK3: for.inc29: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[I20]], align 4 -// CHECK3-NEXT: [[INC30:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I20]], align 4 +// CHECK3-NEXT: [[INC30:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK3-NEXT: store i32 [[INC30]], ptr [[I20]], align 4 // CHECK3-NEXT: br label [[FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: for.end31: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN32:%.*]] = getelementptr inbounds [42 x %struct.S], ptr [[ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN32]], i64 42 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN32]], i64 42 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[FOR_END31]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[FOR_END31]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR7]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN32]] @@ -3697,18 +3776,18 @@ // CHECK3: arraydestroy.done33: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[VAR1]]) #[[ATTR7]] // CHECK3-NEXT: [[ARRAY_BEGIN34:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN34]], i64 2 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN34]], i64 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY35:%.*]] // CHECK3: arraydestroy.body35: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST36:%.*]] = phi ptr [ [[TMP26]], [[ARRAYDESTROY_DONE33]] ], [ [[ARRAYDESTROY_ELEMENT37:%.*]], [[ARRAYDESTROY_BODY35]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST36:%.*]] = phi ptr [ [[TMP25]], [[ARRAYDESTROY_DONE33]] ], [ [[ARRAYDESTROY_ELEMENT37:%.*]], [[ARRAYDESTROY_BODY35]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT37]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST36]], i64 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[ARRAYDESTROY_ELEMENT37]]) #[[ATTR7]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE38:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT37]], [[ARRAY_BEGIN34]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE38]], label [[ARRAYDESTROY_DONE39:%.*]], label [[ARRAYDESTROY_BODY35]] // CHECK3: arraydestroy.done39: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(12) [[TEST]]) #[[ATTR7]] -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: ret i32 [[TMP27]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: ret i32 [[TMP26]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZN1SIfED1Ev @@ -3727,10 +3806,10 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr @g, align 8 -// CHECK3-NEXT: [[CONV:%.*]] = fptosi double [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP1:%.*]] = load volatile double, ptr @g, align 8 +// CHECK3-NEXT: [[CONV:%.*]] = fptosi double [[TMP1]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[F]], align 4 // CHECK3-NEXT: ret void // @@ -3750,10 +3829,10 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load volatile double, ptr @g, align 8 -// CHECK3-NEXT: [[CONV:%.*]] = fptrunc double [[TMP4]] to float +// CHECK3-NEXT: [[TMP1:%.*]] = load volatile double, ptr @g, align 8 +// CHECK3-NEXT: [[CONV:%.*]] = fptrunc double [[TMP1]] to float // CHECK3-NEXT: store float [[CONV]], ptr [[F]], align 4 // CHECK3-NEXT: ret void // @@ -3775,12 +3854,12 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store float [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load float, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP4]] to double -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, ptr @g, align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP5]] +// CHECK3-NEXT: [[TMP1:%.*]] = load float, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP1]] to double +// CHECK3-NEXT: [[TMP2:%.*]] = load volatile double, ptr @g, align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP2]] // CHECK3-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float // CHECK3-NEXT: store float [[CONV2]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -3807,12 +3886,12 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4 // CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile double, ptr @g, align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP5]] +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: [[TMP2:%.*]] = load volatile double, ptr @g, align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP2]] // CHECK3-NEXT: [[CONV2:%.*]] = fptosi double [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV2]], ptr [[F]], align 4 // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -41,287 +41,293 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = add nuw i64 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP12]], align 16 -// CHECK1-NEXT: store i64 [[TMP12]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP15]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP9]], i64 9 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = add nuw i64 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP15]], align 16 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP24]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP37]] -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP38]], i64 9 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP38]], i64 9 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK1-NEXT: [[TMP43:%.*]] = sub i64 [[TMP41]], [[TMP42]] // CHECK1-NEXT: [[TMP44:%.*]] = sdiv exact i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP45:%.*]] = add nuw i64 [[TMP44]], 1 // CHECK1-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP46]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP51]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP55]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP57]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP58]], 9 +// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP59]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP60]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP61]], [[TMP62]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP63]], 1 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP67]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP69]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP70]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[TMP73]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP74]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP70]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP76]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP78]], ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP80]], ptr [[TMP70]]) +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[TMP71]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP72]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP73]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP77]], ptr [[TMP69]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP82:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP82]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP79]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP83:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP84]]) -// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP86]], i32 1) -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP87]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP91:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP91]], ptr [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP92]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP93]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP81]]) +// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP83]], i32 1) +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP87:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP89]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP90]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP91]], [[TMP92]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP93]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP89]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP101]] monotonic, align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP104]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP96]] monotonic, align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 -// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 -// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP100:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP105:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP100]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP100]], i8 [[TMP103]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP105]] = extractvalue { i8, i1 } [[TMP104]], 0 +// CHECK1-NEXT: [[TMP106:%.*]] = extractvalue { i8, i1 } [[TMP104]], 1 +// CHECK1-NEXT: br i1 [[TMP106]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP93]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP89]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) -// CHECK1-NEXT: [[TMP113:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP114:%.*]] = load i32, ptr [[TMP113]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP114]]) +// CHECK1-NEXT: [[TMP107:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP107]]) +// CHECK1-NEXT: [[TMP108:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP109:%.*]] = load i32, ptr [[TMP108]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP109]]) // CHECK1-NEXT: ret void // // @@ -332,8 +338,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -344,12 +350,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -444,59 +450,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -508,38 +514,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/interop_irbuilder.cpp b/clang/test/OpenMP/interop_irbuilder.cpp --- a/clang/test/OpenMP/interop_irbuilder.cpp +++ b/clang/test/OpenMP/interop_irbuilder.cpp @@ -98,26 +98,26 @@ // CHECK-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR]], align 8 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: call void @__tgt_interop_use(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4]], ptr [[INTEROP]], i32 -1, i32 2, ptr [[TMP2]], i32 1) -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 -// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[D0]] to i64 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i64 0 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 0 -// CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 1 -// CHECK-NEXT: store i64 4, ptr [[TMP18]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 2 -// CHECK-NEXT: store i8 1, ptr [[TMP19]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[D1]] to i64 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i64 1 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP21]], i32 0, i32 0 -// CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP21]], i32 0, i32 1 -// CHECK-NEXT: store i64 4, ptr [[TMP23]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP21]], i32 0, i32 2 -// CHECK-NEXT: store i8 1, ptr [[TMP24]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[D0]] to i64 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, ptr [[TMP17]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, ptr [[TMP18]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[D1]] to i64 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 1 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP19]], ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, ptr [[TMP22]], align 8 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, ptr [[TMP23]], align 8 // CHECK-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR6]], align 8 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__tgt_interop_destroy(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], ptr [[INTEROP]], i32 -1, i32 2, ptr [[TMP14]], i32 0) +// CHECK-NEXT: call void @__tgt_interop_destroy(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM7]], ptr [[INTEROP]], i32 -1, i32 2, ptr [[TMP13]], i32 0) // CHECK-NEXT: ret void // // @@ -169,26 +169,26 @@ // CHECK-NEXT: [[INTEROP8:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: call void @__tgt_interop_use(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9]], ptr [[INTEROP8]], i32 -1, i32 2, ptr [[TMP2]], i32 1) -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR10]], i64 0, i64 0 -// CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[D0]] to i64 -// CHECK-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i64 0 -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 0 -// CHECK-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 1 -// CHECK-NEXT: store i64 4, ptr [[TMP18]], align 8 -// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 2 -// CHECK-NEXT: store i8 1, ptr [[TMP19]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[D1]] to i64 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP14]], i64 1 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP21]], i32 0, i32 0 -// CHECK-NEXT: store i64 [[TMP20]], ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP21]], i32 0, i32 1 -// CHECK-NEXT: store i64 4, ptr [[TMP23]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP21]], i32 0, i32 2 -// CHECK-NEXT: store i8 1, ptr [[TMP24]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR10]], i64 0, i64 0 +// CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[D0]] to i64 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 0 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, ptr [[TMP17]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, ptr [[TMP18]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[D1]] to i64 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP13]], i64 1 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 0 +// CHECK-NEXT: store i64 [[TMP19]], ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 1 +// CHECK-NEXT: store i64 4, ptr [[TMP22]], align 8 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 2 +// CHECK-NEXT: store i8 1, ptr [[TMP23]], align 8 // CHECK-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR11]], align 8 // CHECK-NEXT: [[INTEROP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[THIS1]], i32 0, i32 0 // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK-NEXT: call void @__tgt_interop_destroy(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], ptr [[INTEROP12]], i32 -1, i32 2, ptr [[TMP14]], i32 0) +// CHECK-NEXT: call void @__tgt_interop_destroy(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13]], ptr [[INTEROP12]], i32 -1, i32 2, ptr [[TMP13]], i32 0) // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_auto.c b/clang/test/OpenMP/irbuilder_for_unsigned_auto.c --- a/clang/test/OpenMP/irbuilder_for_unsigned_auto.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_auto.c @@ -5,96 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_auto( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 33, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: store i32 1, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: store i32 %[[DOTCOUNT]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741862, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1) -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[UB:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: -// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]]) -// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 -// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] -// CHECK-NEXT: } extern "C" void workshareloop_unsigned_auto(float *a, float *b, float *c, float *d) { #pragma omp for schedule(auto) @@ -105,69 +15,150 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 32000000, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 7, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} -// CHECK: ![[META2:[0-9]+]] = +// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned_auto +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 33, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i32 1, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 [[DOTCOUNT]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741862, i32 1, i32 [[DOTCOUNT]], i32 1, i32 1) +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER_OUTER_COND:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ [[LB:%.*]], [[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[UB:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[UB]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_PREHEADER_OUTER_COND]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// CHECK: omp_loop.preheader.outer.cond: +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]]) +// CHECK-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[LB]] = sub i32 [[TMP16]], 1 +// CHECK-NEXT: br i1 [[TMP15]], label [[OMP_LOOP_HEADER]], label [[OMP_LOOP_EXIT:%.*]] +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 32000000, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 7, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_down.c b/clang/test/OpenMP/irbuilder_for_unsigned_down.c --- a/clang/test/OpenMP/irbuilder_for_unsigned_down.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_down.c @@ -5,74 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_down( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store i32 32000000, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: store i32 0, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = sub i32 %[[DOTCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]], i32 1, i32 0) -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = sub i32 %[[TMP5]], %[[TMP4]] -// CHECK-NEXT: %[[TMP7:.+]] = add i32 %[[TMP6]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[TMP7]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: %[[TMP8:.+]] = add i32 %[[OMP_LOOP_IV]], %[[TMP4]] -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[TMP8]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[CONV:.+]] = uitofp i32 %[[TMP9]] to float -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP11:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP11]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP10]], i64 %[[IDXPROM]] -// CHECK-NEXT: store float %[[CONV]], ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM2:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } extern "C" void workshareloop_unsigned_down(float *a) { #pragma omp for @@ -82,77 +14,132 @@ } #endif // HEADER -// -// -// -// -// -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 33, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 -7, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp ugt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub nsw i32 0, %[[TMP8]] -// CHECK-NEXT: %[[SUB2:.+]] = sub i32 %[[SUB1]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB2]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i32 0, %[[TMP9]] -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[SUB3]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 -7, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} -// CHECK: ![[META2:[0-9]+]] = +// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned_down +// CHECK-SAME: (ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store i32 32000000, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]] +// CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP7]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP4]] +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP8]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = uitofp i32 [[TMP9]] to float +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 [[IDXPROM]] +// CHECK-NEXT: store float [[CONV]], ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 33, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 -7, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 0, [[TMP8]] +// CHECK-NEXT: [[SUB2:%.*]] = sub i32 [[SUB1]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB2]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 0, [[TMP9]] +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[SUB3]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 -7, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c --- a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic.c @@ -5,96 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_dynamic( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 33, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: store i32 1, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: store i32 %[[DOTCOUNT]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741859, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1) -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[UB:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: -// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]]) -// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 -// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] -// CHECK-NEXT: } extern "C" void workshareloop_unsigned_dynamic(float *a, float *b, float *c, float *d) { #pragma omp for schedule(dynamic) @@ -105,69 +15,150 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 32000000, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 7, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} -// CHECK: ![[META2:[0-9]+]] = +// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned_dynamic +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 33, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i32 1, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 [[DOTCOUNT]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741859, i32 1, i32 [[DOTCOUNT]], i32 1, i32 1) +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER_OUTER_COND:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ [[LB:%.*]], [[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[UB:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[UB]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_PREHEADER_OUTER_COND]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// CHECK: omp_loop.preheader.outer.cond: +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]]) +// CHECK-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[LB]] = sub i32 [[TMP16]], 1 +// CHECK-NEXT: br i1 [[TMP15]], label [[OMP_LOOP_HEADER]], label [[OMP_LOOP_EXIT:%.*]] +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 32000000, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 7, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c --- a/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_dynamic_chunked.c @@ -5,96 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_dynamic_chunked( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 33, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: store i32 1, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: store i32 %[[DOTCOUNT]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741859, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 5) -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[UB:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: -// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]]) -// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 -// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] -// CHECK-NEXT: } extern "C" void workshareloop_unsigned_dynamic_chunked(float *a, float *b, float *c, float *d) { #pragma omp for schedule(dynamic, 5) @@ -105,69 +15,150 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 32000000, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 7, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} -// CHECK: ![[META2:[0-9]+]] = +// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned_dynamic_chunked +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 33, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i32 1, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 [[DOTCOUNT]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741859, i32 1, i32 [[DOTCOUNT]], i32 1, i32 5) +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER_OUTER_COND:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ [[LB:%.*]], [[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[UB:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[UB]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_PREHEADER_OUTER_COND]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// CHECK: omp_loop.preheader.outer.cond: +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]]) +// CHECK-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[LB]] = sub i32 [[TMP16]], 1 +// CHECK-NEXT: br i1 [[TMP15]], label [[OMP_LOOP_HEADER]], label [[OMP_LOOP_EXIT:%.*]] +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 32000000, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 7, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c b/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c --- a/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_runtime.c @@ -5,96 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_runtime( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 33, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: store i32 1, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: store i32 %[[DOTCOUNT]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1073741861, i32 1, i32 %[[DOTCOUNT]], i32 1, i32 1) -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER_OUTER_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER:.*]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ %[[LB:.+]], %[[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[UB:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[UB]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_PREHEADER_OUTER_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT:.*]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER_OUTER_COND]]: -// CHECK-NEXT: %[[TMP14:.+]] = call i32 @__kmpc_dispatch_next_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]]) -// CHECK-NEXT: %[[TMP15:.+]] = icmp ne i32 %[[TMP14]], 0 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[LB]] = sub i32 %[[TMP16]], 1 -// CHECK-NEXT: br i1 %[[TMP15]], label %[[OMP_LOOP_HEADER]], label %[[OMP_LOOP_EXIT]] -// CHECK-NEXT: } extern "C" void workshareloop_unsigned_runtime(float *a, float *b, float *c, float *d) { #pragma omp for schedule(runtime) @@ -105,69 +15,150 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 32000000, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 7, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} -// CHECK: ![[META2:[0-9]+]] = +// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned_runtime +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 33, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i32 1, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: store i32 [[DOTCOUNT]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_dispatch_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1073741861, i32 1, i32 [[DOTCOUNT]], i32 1, i32 1) +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER_OUTER_COND:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ [[LB:%.*]], [[OMP_LOOP_PREHEADER_OUTER_COND]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[UB:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[UB]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_PREHEADER_OUTER_COND]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// CHECK: omp_loop.preheader.outer.cond: +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]]) +// CHECK-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[LB]] = sub i32 [[TMP16]], 1 +// CHECK-NEXT: br i1 [[TMP15]], label [[OMP_LOOP_HEADER]], label [[OMP_LOOP_EXIT:%.*]] +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 32000000, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 7, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c b/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c --- a/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c +++ b/clang/test/OpenMP/irbuilder_for_unsigned_static_chunked.c @@ -5,137 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@workshareloop_unsigned_static_chunked( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 33, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: store i32 0, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = sub i32 %[[DOTCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 33, ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]], i32 1, i32 5) -// CHECK-NEXT: %[[OMP_FIRSTCHUNK_LB:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[OMP_FIRSTCHUNK_UB:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = add i32 %[[OMP_FIRSTCHUNK_UB]], 1 -// CHECK-NEXT: %[[OMP_CHUNK_RANGE:.+]] = sub i32 %[[TMP4]], %[[OMP_FIRSTCHUNK_LB]] -// CHECK-NEXT: %[[OMP_DISPATCH_STRIDE:.+]] = load i32, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = sub nuw i32 %[[DOTCOUNT]], %[[OMP_FIRSTCHUNK_LB]] -// CHECK-NEXT: %[[TMP6:.+]] = icmp ule i32 %[[DOTCOUNT]], %[[OMP_FIRSTCHUNK_LB]] -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[TMP5]], 1 -// CHECK-NEXT: %[[TMP8:.+]] = udiv i32 %[[TMP7]], %[[OMP_DISPATCH_STRIDE]] -// CHECK-NEXT: %[[TMP9:.+]] = add i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[TMP10:.+]] = icmp ule i32 %[[TMP5]], %[[OMP_DISPATCH_STRIDE]] -// CHECK-NEXT: %[[TMP11:.+]] = select i1 %[[TMP10]], i32 1, i32 %[[TMP9]] -// CHECK-NEXT: %[[OMP_DISPATCH_TRIPCOUNT:.+]] = select i1 %[[TMP6]], i32 0, i32 %[[TMP11]] -// CHECK-NEXT: br label %[[OMP_DISPATCH_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_DISPATCH_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_DISPATCH_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_DISPATCH_HEADER]]: -// CHECK-NEXT: %[[OMP_DISPATCH_IV:.+]] = phi i32 [ 0, %[[OMP_DISPATCH_PREHEADER]] ], [ %[[OMP_DISPATCH_NEXT:.+]], %[[OMP_DISPATCH_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_DISPATCH_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_DISPATCH_COND]]: -// CHECK-NEXT: %[[OMP_DISPATCH_CMP:.+]] = icmp ult i32 %[[OMP_DISPATCH_IV]], %[[OMP_DISPATCH_TRIPCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_DISPATCH_CMP]], label %[[OMP_DISPATCH_BODY:.+]], label %[[OMP_DISPATCH_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_DISPATCH_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = mul i32 %[[OMP_DISPATCH_IV]], %[[OMP_DISPATCH_STRIDE]] -// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[TMP12]], %[[OMP_FIRSTCHUNK_LB]] -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER9:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_DISPATCH_INC]]: -// CHECK-NEXT: %[[OMP_DISPATCH_NEXT]] = add nuw i32 %[[OMP_DISPATCH_IV]], 1 -// CHECK-NEXT: br label %[[OMP_DISPATCH_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_DISPATCH_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM10:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM10]]) -// CHECK-NEXT: br label %[[OMP_DISPATCH_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_DISPATCH_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER9]]: -// CHECK-NEXT: %[[TMP14:.+]] = add i32 %[[TMP13]], %[[OMP_CHUNK_RANGE]] -// CHECK-NEXT: %[[OMP_CHUNK_IS_LAST:.+]] = icmp uge i32 %[[TMP14]], %[[DOTCOUNT]] -// CHECK-NEXT: %[[TMP15:.+]] = sub i32 %[[DOTCOUNT]], %[[TMP13]] -// CHECK-NEXT: %[[OMP_CHUNK_TRIPCOUNT:.+]] = select i1 %[[OMP_CHUNK_IS_LAST]], i32 %[[TMP15]], i32 %[[OMP_CHUNK_RANGE]] -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER9]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[OMP_CHUNK_TRIPCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: %[[TMP16:.+]] = add i32 %[[OMP_LOOP_IV]], %[[TMP13]] -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[TMP16]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = zext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = zext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = zext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = zext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP26]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_DISPATCH_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } extern "C" void workshareloop_unsigned_static_chunked(float *a, float *b, float *c, float *d) { #pragma omp for schedule(static, 5) @@ -146,69 +15,184 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 32000000, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 7, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp ult i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 7, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 45} -// CHECK: ![[META2:[0-9]+]] = +// CHECK-LABEL: define {{[^@]+}}@workshareloop_unsigned_static_chunked +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 33, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 33, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 5) +// CHECK-NEXT: [[OMP_FIRSTCHUNK_LB:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[OMP_FIRSTCHUNK_UB:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OMP_FIRSTCHUNK_UB]], 1 +// CHECK-NEXT: [[OMP_CHUNK_RANGE:%.*]] = sub i32 [[TMP4]], [[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: [[OMP_DISPATCH_STRIDE:%.*]] = load i32, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = sub nuw i32 [[DOTCOUNT]], [[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: [[TMP6:%.*]] = icmp ule i32 [[DOTCOUNT]], [[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], 1 +// CHECK-NEXT: [[TMP8:%.*]] = udiv i32 [[TMP7]], [[OMP_DISPATCH_STRIDE]] +// CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1 +// CHECK-NEXT: [[TMP10:%.*]] = icmp ule i32 [[TMP5]], [[OMP_DISPATCH_STRIDE]] +// CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 1, i32 [[TMP9]] +// CHECK-NEXT: [[OMP_DISPATCH_TRIPCOUNT:%.*]] = select i1 [[TMP6]], i32 0, i32 [[TMP11]] +// CHECK-NEXT: br label [[OMP_DISPATCH_PREHEADER:%.*]] +// CHECK: omp_dispatch.preheader: +// CHECK-NEXT: br label [[OMP_DISPATCH_HEADER:%.*]] +// CHECK: omp_dispatch.header: +// CHECK-NEXT: [[OMP_DISPATCH_IV:%.*]] = phi i32 [ 0, [[OMP_DISPATCH_PREHEADER]] ], [ [[OMP_DISPATCH_NEXT:%.*]], [[OMP_DISPATCH_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK: omp_dispatch.cond: +// CHECK-NEXT: [[OMP_DISPATCH_CMP:%.*]] = icmp ult i32 [[OMP_DISPATCH_IV]], [[OMP_DISPATCH_TRIPCOUNT]] +// CHECK-NEXT: br i1 [[OMP_DISPATCH_CMP]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_EXIT:%.*]] +// CHECK: omp_dispatch.body: +// CHECK-NEXT: [[TMP12:%.*]] = mul i32 [[OMP_DISPATCH_IV]], [[OMP_DISPATCH_STRIDE]] +// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP12]], [[OMP_FIRSTCHUNK_LB]] +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER9:%.*]] +// CHECK: omp_dispatch.inc: +// CHECK-NEXT: [[OMP_DISPATCH_NEXT]] = add nuw i32 [[OMP_DISPATCH_IV]], 1 +// CHECK-NEXT: br label [[OMP_DISPATCH_HEADER]] +// CHECK: omp_dispatch.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM10]]) +// CHECK-NEXT: br label [[OMP_DISPATCH_AFTER:%.*]] +// CHECK: omp_dispatch.after: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.preheader9: +// CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], [[OMP_CHUNK_RANGE]] +// CHECK-NEXT: [[OMP_CHUNK_IS_LAST:%.*]] = icmp uge i32 [[TMP14]], [[DOTCOUNT]] +// CHECK-NEXT: [[TMP15:%.*]] = sub i32 [[DOTCOUNT]], [[TMP13]] +// CHECK-NEXT: [[OMP_CHUNK_TRIPCOUNT:%.*]] = select i1 [[OMP_CHUNK_IS_LAST]], i32 [[TMP15]], i32 [[OMP_CHUNK_RANGE]] +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER9]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[OMP_CHUNK_TRIPCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP13]] +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP16]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP25]] +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_DISPATCH_INC]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 32000000, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 7, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 7, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -203,12 +203,12 @@ // CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[R:%.*]], ptr [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], ptr [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG127:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -217,60 +217,60 @@ // CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG128:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK-DEBUG: omp.par.outlined.exit184: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I185]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I185]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I185]], align 4, !dbg [[DBG135]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG136:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG137:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG136]] // CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG136]] // CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG136]] // CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG136]] // CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG138:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG139]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG138]] +// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG140:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG139]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG141:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG142:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG136]] // CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG136]] // CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG136]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG138]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG138]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG136]] // CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG143:![0-9]+]] // void parallel_for_2(float *r, int a, double b) { #pragma omp parallel diff --git a/clang/test/OpenMP/irbuilder_safelen.cpp b/clang/test/OpenMP/irbuilder_safelen.cpp --- a/clang/test/OpenMP/irbuilder_safelen.cpp +++ b/clang/test/OpenMP/irbuilder_safelen.cpp @@ -128,6 +128,7 @@ //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 45} +// CHECK: !2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} // CHECK: !3 = distinct !{!3, !4, !5} // CHECK: !4 = !{!"llvm.loop.vectorize.enable", i1 true} // CHECK: !5 = !{!"llvm.loop.vectorize.width", i32 3} diff --git a/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp b/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp --- a/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp +++ b/clang/test/OpenMP/irbuilder_safelen_order_concurrent.cpp @@ -128,6 +128,7 @@ //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 50} +// CHECK: !2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} // CHECK: !3 = distinct !{} // CHECK: !4 = distinct !{!4, !5, !6, !7} // CHECK: !5 = !{!"llvm.loop.parallel_accesses", !3} diff --git a/clang/test/OpenMP/irbuilder_simd_aligned.cpp b/clang/test/OpenMP/irbuilder_simd_aligned.cpp --- a/clang/test/OpenMP/irbuilder_simd_aligned.cpp +++ b/clang/test/OpenMP/irbuilder_simd_aligned.cpp @@ -168,6 +168,7 @@ //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 50} +// CHECK: !2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} // CHECK: !3 = distinct !{!3, !4} // CHECK: !4 = !{!"llvm.loop.mustprogress"} // CHECK: !5 = distinct !{} diff --git a/clang/test/OpenMP/irbuilder_simdlen.cpp b/clang/test/OpenMP/irbuilder_simdlen.cpp --- a/clang/test/OpenMP/irbuilder_simdlen.cpp +++ b/clang/test/OpenMP/irbuilder_simdlen.cpp @@ -128,6 +128,7 @@ //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 45} +// CHECK: !2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} // CHECK: !3 = distinct !{} // CHECK: !4 = distinct !{!4, !5, !6, !7} // CHECK: !5 = !{!"llvm.loop.parallel_accesses", !3} diff --git a/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp b/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp --- a/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp +++ b/clang/test/OpenMP/irbuilder_simdlen_safelen.cpp @@ -128,6 +128,7 @@ //. // CHECK: !0 = !{i32 1, !"wchar_size", i32 4} // CHECK: !1 = !{i32 7, !"openmp", i32 45} +// CHECK: !2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} // CHECK: !3 = distinct !{!3, !4, !5} // CHECK: !4 = !{!"llvm.loop.vectorize.enable", i1 true} // CHECK: !5 = !{!"llvm.loop.vectorize.width", i32 2} diff --git a/clang/test/OpenMP/irbuilder_unroll_full.c b/clang/test/OpenMP/irbuilder_unroll_full.c --- a/clang/test/OpenMP/irbuilder_unroll_full.c +++ b/clang/test/OpenMP/irbuilder_unroll_full.c @@ -5,77 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_full( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_full(float *a, float *b, float *c, float *d) { #pragma omp unroll full for (int i = 0; i < 2; i++) { @@ -85,72 +14,132 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.full"} +// CHECK-LABEL: define {{[^@]+}}@unroll_full +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_heuristic.c --- a/clang/test/OpenMP/irbuilder_unroll_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_heuristic.c @@ -5,77 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_heuristic( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_heuristic(float *a, float *b, float *c, float *d) { #pragma omp unroll @@ -86,71 +15,132 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 128, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} +// CHECK-LABEL: define {{[^@]+}}@unroll_heuristic +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 128, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor.c @@ -5,77 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_factor(float *a, float *b, float *c, float *d) { #pragma omp unroll partial(3) for (int i = 0; i < 2; i++) { @@ -85,72 +14,132 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 3} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_factor +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c @@ -5,141 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[N:.+]], ptr %[[N_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store ptr %[[N_ADDR]], ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[TMP2]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 13 -// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 13 -// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0 -// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP8]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]], i32 1, i32 0) -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]] -// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 13 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 13, %[[TMP13]] -// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[TMP17]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP18:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP19:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP18]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP20:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP21:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP22:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP21]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP23:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP20]], %[[TMP23]] -// CHECK-NEXT: %[[TMP24:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP25:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP25]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP24]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP26:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP26]] -// CHECK-NEXT: %[[TMP27:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP28:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP28]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP27]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic_for(int n, float *a, float *b, float *c, float *d) { #pragma omp for @@ -151,75 +16,189 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 1 -// CHECK-NEXT: %[[TMP5:.+]] = load ptr, ptr %[[TMP4]], align 8 -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[TMP5]], align 4 -// CHECK-NEXT: store i32 %[[TMP6]], ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP11]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP13:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP13]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 13} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_for +// CHECK-SAME: (i32 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP4:%.*]] = udiv i32 [[DOTCOUNT]], 13 +// CHECK-NEXT: [[TMP5:%.*]] = urem i32 [[DOTCOUNT]], 13 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP4]], [[TMP7]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP8]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP12]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 13 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP15]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP16:%.*]] = mul nuw i32 13, [[TMP13]] +// CHECK-NEXT: [[TMP17:%.*]] = add nuw i32 [[TMP16]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP17]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK-NEXT: [[TMP24:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP26]] +// CHECK-NEXT: [[TMP27:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP11]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP12]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_factor_for_collapse.c @@ -5,181 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for_collapse( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[M:.+]], ptr %[[M_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load i32, ptr %[[M_ADDR]], align 4 -// CHECK-NEXT: store i32 %[[TMP0]], ptr %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: store i32 0, ptr %[[J]], align 4 -// CHECK-NEXT: %[[TMP1_1:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0 -// CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1 -// CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64 -// CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 2 -// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1 -// CHECK-NEXT: store i64 %[[SUB3]], ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 0, ptr %[[DOTUNROLLED_IV_J]], align 4 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_THEN]]: -// CHECK-NEXT: store i64 0, ptr %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i64, ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i64 %[[TMP3]], ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: store i64 1, ptr %[[DOTOMP_STRIDE]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @3) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK-NEXT: %[[TMP4:.+]] = load i64, ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP5:.+]] = load i64, ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i64, ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: %[[TMP7:.+]] = load i64, ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ] -// CHECK-NEXT: store i64 %[[COND]], ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP8:.+]] = load i64, ptr %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: store i64 %[[TMP8]], ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_COND]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i64, ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_BODY]]: -// CHECK-NEXT: %[[TMP11:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 2 -// CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]] -// CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32 -// CHECK-NEXT: store i32 %[[CONV14]], ptr %[[I6]], align 4 -// CHECK-NEXT: %[[TMP12:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 2 -// CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 2 -// CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]] -// CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 4 -// CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]] -// CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32 -// CHECK-NEXT: store i32 %[[CONV20]], ptr %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: store i32 %[[TMP14]], ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_COND]]: -// CHECK-NEXT: %[[TMP15:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, ptr %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4 -// CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]] -// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_RHS]]: -// CHECK-NEXT: %[[TMP17:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8 -// CHECK-NEXT: br label %[[LAND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_END]]: -// CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ] -// CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_BODY]]: -// CHECK-NEXT: %[[TMP19:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1 -// CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]] -// CHECK-NEXT: store i32 %[[ADD27]], ptr %[[J]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP20]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP23:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM28:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX29:.+]] = getelementptr inbounds float, ptr %[[TMP23]], i64 %[[IDXPROM28]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, ptr %[[ARRAYIDX29]], align 4 -// CHECK-NEXT: %[[TMP26:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, ptr %[[J]], align 4 -// CHECK-NEXT: %[[IDXPROM30:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX31:.+]] = getelementptr inbounds float, ptr %[[TMP26]], i64 %[[IDXPROM30]] -// CHECK-NEXT: %[[TMP28:.+]] = load float, ptr %[[ARRAYIDX31]], align 4 -// CHECK-NEXT: %[[MUL32:.+]] = fmul float %[[TMP25]], %[[TMP28]] -// CHECK-NEXT: %[[ADD33:.+]] = fadd float %[[TMP22]], %[[MUL32]] -// CHECK-NEXT: %[[TMP29:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP30:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM34:.+]] = sext i32 %[[TMP30]] to i64 -// CHECK-NEXT: %[[ARRAYIDX35:.+]] = getelementptr inbounds float, ptr %[[TMP29]], i64 %[[IDXPROM34]] -// CHECK-NEXT: %[[TMP31:.+]] = load float, ptr %[[ARRAYIDX35]], align 4 -// CHECK-NEXT: %[[ADD36:.+]] = fadd float %[[TMP31]], %[[ADD33]] -// CHECK-NEXT: store float %[[ADD36]], ptr %[[ARRAYIDX35]], align 4 -// CHECK-NEXT: br label %[[FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_INC]]: -// CHECK-NEXT: %[[TMP32:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP32]], 1 -// CHECK-NEXT: store i32 %[[INC]], ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_END]]: -// CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_BODY_CONTINUE]]: -// CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_INC]]: -// CHECK-NEXT: %[[TMP33:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[ADD37:.+]] = add nsw i64 %[[TMP33]], 1 -// CHECK-NEXT: store i64 %[[ADD37]], ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_END]]: -// CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(ptr @5) -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM38]]) -// CHECK-NEXT: br label %[[OMP_PRECOND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(ptr @7) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @6, i32 %[[OMP_GLOBAL_THREAD_NUM39]]) -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_factor_for_collapse(int m, float *a, float *b, float *c, float *d) { #pragma omp for collapse(2) for (int i = 0; i < m; i++) { @@ -192,9 +17,162 @@ #endif // HEADER -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_factor_for_collapse +// CHECK-SAME: (i32 noundef [[M:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTUNROLLED_IV_J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTUNROLLED_IV_J7:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTUNROLL_INNER_IV_J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], 2 +// CHECK-NEXT: [[SUB3:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTUNROLLED_IV_J]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK: omp.precond.then: +// CHECK-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i64 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP6]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: [[CMP10:%.*]] = icmp sle i64 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[DIV12:%.*]] = sdiv i64 [[TMP11]], 2 +// CHECK-NEXT: [[MUL13:%.*]] = mul nsw i64 [[DIV12]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL13]] +// CHECK-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK-NEXT: store i32 [[CONV14]], ptr [[I6]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP13]], 2 +// CHECK-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 2 +// CHECK-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP12]], [[MUL16]] +// CHECK-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 4 +// CHECK-NEXT: [[ADD19:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD19]] to i32 +// CHECK-NEXT: store i32 [[CONV20]], ptr [[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: store i32 [[TMP14]], ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP16]], 4 +// CHECK-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP15]], [[ADD21]] +// CHECK-NEXT: br i1 [[CMP22]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] +// CHECK: land.rhs: +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[CMP24:%.*]] = icmp slt i32 [[TMP17]], 8 +// CHECK-NEXT: br label [[LAND_END]] +// CHECK: land.end: +// CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[FOR_COND]] ], [ [[CMP24]], [[LAND_RHS]] ] +// CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[MUL26:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 0, [[MUL26]] +// CHECK-NEXT: store i32 [[ADD27]], ptr [[J]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM28:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM28]] +// CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX29]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[J]], align 4 +// CHECK-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM30]] +// CHECK-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX31]], align 4 +// CHECK-NEXT: [[MUL32:%.*]] = fmul float [[TMP25]], [[TMP28]] +// CHECK-NEXT: [[ADD33:%.*]] = fadd float [[TMP22]], [[MUL32]] +// CHECK-NEXT: [[TMP29:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM34]] +// CHECK-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX35]], align 4 +// CHECK-NEXT: [[ADD36:%.*]] = fadd float [[TMP31]], [[ADD33]] +// CHECK-NEXT: store float [[ADD36]], ptr [[ARRAYIDX35]], align 4 +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: for.end: +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB5:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM38]]) +// CHECK-NEXT: br label [[OMP_PRECOND_END]] +// CHECK: omp.precond.end: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB6:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]) +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic.c @@ -5,77 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_HEADER]]: -// CHECK-NEXT: %[[OMP_LOOP_IV:.+]] = phi i32 [ 0, %[[OMP_LOOP_PREHEADER]] ], [ %[[OMP_LOOP_NEXT:.+]], %[[OMP_LOOP_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_LOOP_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_COND]]: -// CHECK-NEXT: %[[OMP_LOOP_CMP:.+]] = icmp ult i32 %[[OMP_LOOP_IV]], %[[DOTCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_LOOP_CMP]], label %[[OMP_LOOP_BODY:.+]], label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[OMP_LOOP_IV]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP3:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP4]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP3]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP5:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP6:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP7]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP6]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP8:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP5]], %[[TMP8]] -// CHECK-NEXT: %[[TMP9:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP10]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP9]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP11:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP11]] -// CHECK-NEXT: %[[TMP12:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP13]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP12]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_INC]]: -// CHECK-NEXT: %[[OMP_LOOP_NEXT]] = add nuw i32 %[[OMP_LOOP_IV]], 1 -// CHECK-NEXT: br label %[[OMP_LOOP_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic(float *a, float *b, float *c, float *d) { #pragma omp unroll partial for (int i = 0; i < 2; i++) { @@ -85,71 +14,132 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: br label [[OMP_LOOP_HEADER:%.*]] +// CHECK: omp_loop.header: +// CHECK-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_LOOP_COND:%.*]] +// CHECK: omp_loop.cond: +// CHECK-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]] +// CHECK-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP11]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_INC]] +// CHECK: omp_loop.inc: +// CHECK-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1 +// CHECK-NEXT: br label [[OMP_LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_loop.exit: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c @@ -11,159 +11,6 @@ double sind(double); -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_constant_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[E:.+]], ptr %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], ptr %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]], i32 1, i32 0) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 4 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 4, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[TMP16]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP19]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double noundef %[[CONV]]) -// CHECK-NEXT: %[[TMP20:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP22]] to double -// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]] -// CHECK-NEXT: %[[TMP23:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, ptr %[[TMP23]], i64 %[[IDXPROM5]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, ptr %[[ARRAYIDX6]], align 4 -// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP25]] to double -// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]] -// CHECK-NEXT: %[[TMP26:.+]] = load ptr, ptr %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, ptr %[[TMP26]], i64 %[[IDXPROM9]] -// CHECK-NEXT: %[[TMP28:.+]] = load float, ptr %[[ARRAYIDX10]], align 4 -// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP28]] to double -// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]] -// CHECK-NEXT: %[[TMP29:.+]] = load float, ptr %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP29]] to double -// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]] -// CHECK-NEXT: %[[TMP30:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP31:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP31]] to i64 -// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, ptr %[[TMP30]], i64 %[[IDXPROM14]] -// CHECK-NEXT: %[[TMP32:.+]] = load float, ptr %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP32]] to double -// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]] -// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float -// CHECK-NEXT: store float %[[CONV18]], ptr %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic_constant_for(float *a, float *b, float *c, float *d, float *e, float offset) { #pragma omp for @@ -175,72 +22,204 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 128, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_constant_for +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], float noundef [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OFFSET_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 +// CHECK-NEXT: store float [[OFFSET]], ptr [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP4:%.*]] = urem i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP3]], [[TMP6]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP7]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], [[TMP8]] +// CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP11]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 4 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP15:%.*]] = mul nuw i32 4, [[TMP12]] +// CHECK-NEXT: [[TMP16:%.*]] = add nuw i32 [[TMP15]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP16]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP19]] to double +// CHECK-NEXT: [[CALL:%.*]] = call double @sind(double noundef [[CONV]]) +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[CONV4:%.*]] = fpext float [[TMP22]] to double +// CHECK-NEXT: [[MUL:%.*]] = fmul double [[CALL]], [[CONV4]] +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK-NEXT: [[CONV7:%.*]] = fpext float [[TMP25]] to double +// CHECK-NEXT: [[MUL8:%.*]] = fmul double [[MUL]], [[CONV7]] +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[E_ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 +// CHECK-NEXT: [[CONV11:%.*]] = fpext float [[TMP28]] to double +// CHECK-NEXT: [[MUL12:%.*]] = fmul double [[MUL8]], [[CONV11]] +// CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: [[CONV13:%.*]] = fpext float [[TMP29]] to double +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL12]], [[CONV13]] +// CHECK-NEXT: [[TMP30:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, ptr [[TMP30]], i64 [[IDXPROM14]] +// CHECK-NEXT: [[TMP32:%.*]] = load float, ptr [[ARRAYIDX15]], align 4 +// CHECK-NEXT: [[CONV16:%.*]] = fpext float [[TMP32]] to double +// CHECK-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], [[ADD]] +// CHECK-NEXT: [[CONV18:%.*]] = fptrunc double [[ADD17]] to float +// CHECK-NEXT: store float [[CONV18]], ptr [[ARRAYIDX15]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 128, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_for_collapse.c @@ -9,201 +9,6 @@ double sind(double); -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8 -// CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[M:.+]], ptr %[[M_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[E:.+]], ptr %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], ptr %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = load i32, ptr %[[M_ADDR]], align 4 -// CHECK-NEXT: store i32 %[[TMP0]], ptr %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: store i32 0, ptr %[[J]], align 4 -// CHECK-NEXT: %[[TMP1_1:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0 -// CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1 -// CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64 -// CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 4 -// CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1 -// CHECK-NEXT: store i64 %[[SUB3]], ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 0, ptr %[[DOTUNROLLED_IV_J]], align 4 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_THEN]]: -// CHECK-NEXT: store i64 0, ptr %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i64, ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: store i64 %[[TMP3]], ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: store i64 1, ptr %[[DOTOMP_STRIDE]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @3) -// CHECK-NEXT: call void @__kmpc_for_static_init_8(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK-NEXT: %[[TMP4:.+]] = load i64, ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP5:.+]] = load i64, ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i64, ptr %[[DOTCAPTURE_EXPR_2]], align 8 -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: %[[TMP7:.+]] = load i64, ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ] -// CHECK-NEXT: store i64 %[[COND]], ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[TMP8:.+]] = load i64, ptr %[[DOTOMP_LB]], align 8 -// CHECK-NEXT: store i64 %[[TMP8]], ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_COND]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP10:.+]] = load i64, ptr %[[DOTOMP_UB]], align 8 -// CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_BODY]]: -// CHECK-NEXT: %[[TMP11:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 4 -// CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]] -// CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32 -// CHECK-NEXT: store i32 %[[CONV14]], ptr %[[I6]], align 4 -// CHECK-NEXT: %[[TMP12:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[TMP13:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 4 -// CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 4 -// CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]] -// CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 2 -// CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]] -// CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32 -// CHECK-NEXT: store i32 %[[CONV20]], ptr %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[TMP14:.+]] = load i32, ptr %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: store i32 %[[TMP14]], ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_COND]]: -// CHECK-NEXT: %[[TMP15:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[TMP16:.+]] = load i32, ptr %[[DOTUNROLLED_IV_J7]], align 4 -// CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 2 -// CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]] -// CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_RHS]]: -// CHECK-NEXT: %[[TMP17:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8 -// CHECK-NEXT: br label %[[LAND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[LAND_END]]: -// CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ] -// CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_BODY]]: -// CHECK-NEXT: %[[TMP19:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1 -// CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]] -// CHECK-NEXT: store i32 %[[ADD27]], ptr %[[J]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP20]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV28:.+]] = fpext float %[[TMP22]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double noundef %[[CONV28]]) -// CHECK-NEXT: %[[TMP23:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM29:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX30:.+]] = getelementptr inbounds float, ptr %[[TMP23]], i64 %[[IDXPROM29]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, ptr %[[ARRAYIDX30]], align 4 -// CHECK-NEXT: %[[CONV31:.+]] = fpext float %[[TMP25]] to double -// CHECK-NEXT: %[[MUL32:.+]] = fmul double %[[CALL]], %[[CONV31]] -// CHECK-NEXT: %[[TMP26:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM33:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX34:.+]] = getelementptr inbounds float, ptr %[[TMP26]], i64 %[[IDXPROM33]] -// CHECK-NEXT: %[[TMP28:.+]] = load float, ptr %[[ARRAYIDX34]], align 4 -// CHECK-NEXT: %[[CONV35:.+]] = fpext float %[[TMP28]] to double -// CHECK-NEXT: %[[MUL36:.+]] = fmul double %[[MUL32]], %[[CONV35]] -// CHECK-NEXT: %[[TMP29:.+]] = load ptr, ptr %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP30:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM37:.+]] = sext i32 %[[TMP30]] to i64 -// CHECK-NEXT: %[[ARRAYIDX38:.+]] = getelementptr inbounds float, ptr %[[TMP29]], i64 %[[IDXPROM37]] -// CHECK-NEXT: %[[TMP31:.+]] = load float, ptr %[[ARRAYIDX38]], align 4 -// CHECK-NEXT: %[[CONV39:.+]] = fpext float %[[TMP31]] to double -// CHECK-NEXT: %[[MUL40:.+]] = fmul double %[[MUL36]], %[[CONV39]] -// CHECK-NEXT: %[[TMP32:.+]] = load float, ptr %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV41:.+]] = fpext float %[[TMP32]] to double -// CHECK-NEXT: %[[ADD42:.+]] = fadd double %[[MUL40]], %[[CONV41]] -// CHECK-NEXT: %[[TMP33:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP34:.+]] = load i32, ptr %[[I6]], align 4 -// CHECK-NEXT: %[[IDXPROM43:.+]] = sext i32 %[[TMP34]] to i64 -// CHECK-NEXT: %[[ARRAYIDX44:.+]] = getelementptr inbounds float, ptr %[[TMP33]], i64 %[[IDXPROM43]] -// CHECK-NEXT: %[[TMP35:.+]] = load float, ptr %[[ARRAYIDX44]], align 4 -// CHECK-NEXT: %[[CONV45:.+]] = fpext float %[[TMP35]] to double -// CHECK-NEXT: %[[ADD46:.+]] = fadd double %[[CONV45]], %[[ADD42]] -// CHECK-NEXT: %[[CONV47:.+]] = fptrunc double %[[ADD46]] to float -// CHECK-NEXT: store float %[[CONV47]], ptr %[[ARRAYIDX44]], align 4 -// CHECK-NEXT: br label %[[FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_INC]]: -// CHECK-NEXT: %[[TMP36:.+]] = load i32, ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP36]], 1 -// CHECK-NEXT: store i32 %[[INC]], ptr %[[DOTUNROLL_INNER_IV_J]], align 4 -// CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[FOR_END]]: -// CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_BODY_CONTINUE]]: -// CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_INC]]: -// CHECK-NEXT: %[[TMP37:.+]] = load i64, ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: %[[ADD48:.+]] = add nsw i64 %[[TMP37]], 1 -// CHECK-NEXT: store i64 %[[ADD48]], ptr %[[DOTOMP_IV]], align 8 -// CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_INNER_FOR_END]]: -// CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_EXIT]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM49:.+]] = call i32 @__kmpc_global_thread_num(ptr @5) -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM49]]) -// CHECK-NEXT: br label %[[OMP_PRECOND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_PRECOND_END]]: -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM50:.+]] = call i32 @__kmpc_global_thread_num(ptr @7) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @6, i32 %[[OMP_GLOBAL_THREAD_NUM50]]) -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic_for(int m, float *a, float *b, float *c, float *d, float *e, float offset) { @@ -217,11 +22,183 @@ } #endif // HEADER -// -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_for +// CHECK-SAME: (i32 noundef [[M:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], float noundef [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OFFSET_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTUNROLLED_IV_J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTUNROLLED_IV_J7:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTUNROLL_INNER_IV_J:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 +// CHECK-NEXT: store float [[OFFSET]], ptr [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: store i32 0, ptr [[J]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], 4 +// CHECK-NEXT: [[SUB3:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: store i32 0, ptr [[DOTUNROLLED_IV_J]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK: omp.precond.then: +// CHECK-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i64 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i64 [ [[TMP6]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK-NEXT: [[CMP10:%.*]] = icmp sle i64 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[DIV12:%.*]] = sdiv i64 [[TMP11]], 4 +// CHECK-NEXT: [[MUL13:%.*]] = mul nsw i64 [[DIV12]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL13]] +// CHECK-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK-NEXT: store i32 [[CONV14]], ptr [[I6]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP13]], 4 +// CHECK-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 4 +// CHECK-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP12]], [[MUL16]] +// CHECK-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 2 +// CHECK-NEXT: [[ADD19:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK-NEXT: [[CONV20:%.*]] = trunc i64 [[ADD19]] to i32 +// CHECK-NEXT: store i32 [[CONV20]], ptr [[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: store i32 [[TMP14]], ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: br label [[FOR_COND:%.*]] +// CHECK: for.cond: +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTUNROLLED_IV_J7]], align 4 +// CHECK-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP16]], 2 +// CHECK-NEXT: [[CMP22:%.*]] = icmp slt i32 [[TMP15]], [[ADD21]] +// CHECK-NEXT: br i1 [[CMP22]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] +// CHECK: land.rhs: +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[CMP24:%.*]] = icmp slt i32 [[TMP17]], 8 +// CHECK-NEXT: br label [[LAND_END]] +// CHECK: land.end: +// CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ false, [[FOR_COND]] ], [ [[CMP24]], [[LAND_RHS]] ] +// CHECK-NEXT: br i1 [[TMP18]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// CHECK: for.body: +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[MUL26:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK-NEXT: [[ADD27:%.*]] = add nsw i32 0, [[MUL26]] +// CHECK-NEXT: store i32 [[ADD27]], ptr [[J]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[CONV28:%.*]] = fpext float [[TMP22]] to double +// CHECK-NEXT: [[CALL:%.*]] = call double @sind(double noundef [[CONV28]]) +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM29:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM29]] +// CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX30]], align 4 +// CHECK-NEXT: [[CONV31:%.*]] = fpext float [[TMP25]] to double +// CHECK-NEXT: [[MUL32:%.*]] = fmul double [[CALL]], [[CONV31]] +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM33]] +// CHECK-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX34]], align 4 +// CHECK-NEXT: [[CONV35:%.*]] = fpext float [[TMP28]] to double +// CHECK-NEXT: [[MUL36:%.*]] = fmul double [[MUL32]], [[CONV35]] +// CHECK-NEXT: [[TMP29:%.*]] = load ptr, ptr [[E_ADDR]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM37:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM37]] +// CHECK-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX38]], align 4 +// CHECK-NEXT: [[CONV39:%.*]] = fpext float [[TMP31]] to double +// CHECK-NEXT: [[MUL40:%.*]] = fmul double [[MUL36]], [[CONV39]] +// CHECK-NEXT: [[TMP32:%.*]] = load float, ptr [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: [[CONV41:%.*]] = fpext float [[TMP32]] to double +// CHECK-NEXT: [[ADD42:%.*]] = fadd double [[MUL40]], [[CONV41]] +// CHECK-NEXT: [[TMP33:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[I6]], align 4 +// CHECK-NEXT: [[IDXPROM43:%.*]] = sext i32 [[TMP34]] to i64 +// CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[TMP33]], i64 [[IDXPROM43]] +// CHECK-NEXT: [[TMP35:%.*]] = load float, ptr [[ARRAYIDX44]], align 4 +// CHECK-NEXT: [[CONV45:%.*]] = fpext float [[TMP35]] to double +// CHECK-NEXT: [[ADD46:%.*]] = fadd double [[CONV45]], [[ADD42]] +// CHECK-NEXT: [[CONV47:%.*]] = fptrunc double [[ADD46]] to float +// CHECK-NEXT: store float [[CONV47]], ptr [[ARRAYIDX44]], align 4 +// CHECK-NEXT: br label [[FOR_INC:%.*]] +// CHECK: for.inc: +// CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP36]], 1 +// CHECK-NEXT: store i32 [[INC]], ptr [[DOTUNROLL_INNER_IV_J]], align 4 +// CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: for.end: +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: [[ADD48:%.*]] = add nsw i64 [[TMP37]], 1 +// CHECK-NEXT: store i64 [[ADD48]], ptr [[DOTOMP_IV]], align 8 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK: omp.loop.exit: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM49:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB5:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM49]]) +// CHECK-NEXT: br label [[OMP_PRECOND_END]] +// CHECK: omp.precond.end: +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM50:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB6:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM50]]) +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c --- a/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c +++ b/clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c @@ -9,163 +9,6 @@ double sind(double); -// CHECK-LABEL: define {{.*}}@unroll_partial_heuristic_runtime_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[N_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[E_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[OFFSET_ADDR:.+]] = alloca float, align 4 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store i32 %[[N:.+]], ptr %[[N_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[E:.+]], ptr %[[E_ADDR]], align 8 -// CHECK-NEXT: store float %[[OFFSET:.+]], ptr %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 1 -// CHECK-NEXT: store ptr %[[N_ADDR]], ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP2:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[TMP2]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP4:.+]] = udiv i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP5:.+]] = urem i32 %[[DOTCOUNT]], 4 -// CHECK-NEXT: %[[TMP6:.+]] = icmp ne i32 %[[TMP5]], 0 -// CHECK-NEXT: %[[TMP7:.+]] = zext i1 %[[TMP6]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP4]], %[[TMP7]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP8]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]], i32 1, i32 0) -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP11:.+]] = sub i32 %[[TMP10]], %[[TMP9]] -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[TMP11]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP12]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP13:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP9]] -// CHECK-NEXT: %[[TMP14:.+]] = icmp eq i32 %[[TMP13]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP15:.+]] = select i1 %[[TMP14]], i32 %[[TMP5]], i32 4 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP15]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP16:.+]] = mul nuw i32 4, %[[TMP13]] -// CHECK-NEXT: %[[TMP17:.+]] = add nuw i32 %[[TMP16]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[TMP17]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP18:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP19:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP19]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP18]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP20:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[CONV:.+]] = fpext float %[[TMP20]] to double -// CHECK-NEXT: %[[CALL:.+]] = call double @sind(double noundef %[[CONV]]) -// CHECK-NEXT: %[[TMP21:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP22:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP22]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP21]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP23:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[CONV4:.+]] = fpext float %[[TMP23]] to double -// CHECK-NEXT: %[[MUL:.+]] = fmul double %[[CALL]], %[[CONV4]] -// CHECK-NEXT: %[[TMP24:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP25:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM5:.+]] = sext i32 %[[TMP25]] to i64 -// CHECK-NEXT: %[[ARRAYIDX6:.+]] = getelementptr inbounds float, ptr %[[TMP24]], i64 %[[IDXPROM5]] -// CHECK-NEXT: %[[TMP26:.+]] = load float, ptr %[[ARRAYIDX6]], align 4 -// CHECK-NEXT: %[[CONV7:.+]] = fpext float %[[TMP26]] to double -// CHECK-NEXT: %[[MUL8:.+]] = fmul double %[[MUL]], %[[CONV7]] -// CHECK-NEXT: %[[TMP27:.+]] = load ptr, ptr %[[E_ADDR]], align 8 -// CHECK-NEXT: %[[TMP28:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM9:.+]] = sext i32 %[[TMP28]] to i64 -// CHECK-NEXT: %[[ARRAYIDX10:.+]] = getelementptr inbounds float, ptr %[[TMP27]], i64 %[[IDXPROM9]] -// CHECK-NEXT: %[[TMP29:.+]] = load float, ptr %[[ARRAYIDX10]], align 4 -// CHECK-NEXT: %[[CONV11:.+]] = fpext float %[[TMP29]] to double -// CHECK-NEXT: %[[MUL12:.+]] = fmul double %[[MUL8]], %[[CONV11]] -// CHECK-NEXT: %[[TMP30:.+]] = load float, ptr %[[OFFSET_ADDR]], align 4 -// CHECK-NEXT: %[[CONV13:.+]] = fpext float %[[TMP30]] to double -// CHECK-NEXT: %[[ADD:.+]] = fadd double %[[MUL12]], %[[CONV13]] -// CHECK-NEXT: %[[TMP31:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP32:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM14:.+]] = sext i32 %[[TMP32]] to i64 -// CHECK-NEXT: %[[ARRAYIDX15:.+]] = getelementptr inbounds float, ptr %[[TMP31]], i64 %[[IDXPROM14]] -// CHECK-NEXT: %[[TMP33:.+]] = load float, ptr %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: %[[CONV16:.+]] = fpext float %[[TMP33]] to double -// CHECK-NEXT: %[[ADD17:.+]] = fadd double %[[CONV16]], %[[ADD]] -// CHECK-NEXT: %[[CONV18:.+]] = fptrunc double %[[ADD17]] to float -// CHECK-NEXT: store float %[[CONV18]], ptr %[[ARRAYIDX15]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM19:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM19]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_heuristic_runtime_for(int n, float *a, float *b, float *c, float *d, float *e, float offset) { #pragma omp for @@ -177,75 +20,211 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 1 -// CHECK-NEXT: %[[TMP5:.+]] = load ptr, ptr %[[TMP4]], align 8 -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[TMP5]], align 4 -// CHECK-NEXT: store i32 %[[TMP6]], ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP7]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP9]], %[[TMP10]] -// CHECK-NEXT: %[[TMP11:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP11]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP12:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP12]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP13:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP13]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_heuristic_runtime_for +// CHECK-SAME: (i32 noundef [[N:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], float noundef [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[OFFSET_ADDR:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 +// CHECK-NEXT: store float [[OFFSET]], ptr [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP4:%.*]] = udiv i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP5:%.*]] = urem i32 [[DOTCOUNT]], 4 +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP4]], [[TMP7]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP8]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], [[TMP9]] +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP12]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP13:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP9]] +// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP5]], i32 4 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP15]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP16:%.*]] = mul nuw i32 4, [[TMP13]] +// CHECK-NEXT: [[TMP17:%.*]] = add nuw i32 [[TMP16]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP17]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK-NEXT: [[CALL:%.*]] = call double @sind(double noundef [[CONV]]) +// CHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[CONV4:%.*]] = fpext float [[TMP23]] to double +// CHECK-NEXT: [[MUL:%.*]] = fmul double [[CALL]], [[CONV4]] +// CHECK-NEXT: [[TMP24:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM5]] +// CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK-NEXT: [[CONV7:%.*]] = fpext float [[TMP26]] to double +// CHECK-NEXT: [[MUL8:%.*]] = fmul double [[MUL]], [[CONV7]] +// CHECK-NEXT: [[TMP27:%.*]] = load ptr, ptr [[E_ADDR]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM9]] +// CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 +// CHECK-NEXT: [[CONV11:%.*]] = fpext float [[TMP29]] to double +// CHECK-NEXT: [[MUL12:%.*]] = fmul double [[MUL8]], [[CONV11]] +// CHECK-NEXT: [[TMP30:%.*]] = load float, ptr [[OFFSET_ADDR]], align 4 +// CHECK-NEXT: [[CONV13:%.*]] = fpext float [[TMP30]] to double +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL12]], [[CONV13]] +// CHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds float, ptr [[TMP31]], i64 [[IDXPROM14]] +// CHECK-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX15]], align 4 +// CHECK-NEXT: [[CONV16:%.*]] = fpext float [[TMP33]] to double +// CHECK-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], [[ADD]] +// CHECK-NEXT: [[CONV18:%.*]] = fptrunc double [[ADD17]] to float +// CHECK-NEXT: store float [[CONV18]], ptr [[ARRAYIDX15]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM19:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM19]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP11]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP12]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP13]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c @@ -5,137 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_partial_factor_for( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LASTITER:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_LOWERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_UPPERBOUND:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[P_STRIDE:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 2 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: store i32 0, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = sub i32 %[[OMP_FLOOR0_TRIPCOUNT]], 1 -// CHECK-NEXT: store i32 %[[TMP7]], ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[P_STRIDE]], align 4 -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr %[[P_LASTITER]], ptr %[[P_LOWERBOUND]], ptr %[[P_UPPERBOUND]], ptr %[[P_STRIDE]], i32 1, i32 0) -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[P_LOWERBOUND]], align 4 -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[P_UPPERBOUND]], align 4 -// CHECK-NEXT: %[[TMP10:.+]] = sub i32 %[[TMP9]], %[[TMP8]] -// CHECK-NEXT: %[[TMP11:.+]] = add i32 %[[TMP10]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[TMP11]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP12:.+]] = add i32 %[[OMP_FLOOR0_IV]], %[[TMP8]] -// CHECK-NEXT: %[[TMP13:.+]] = icmp eq i32 %[[TMP12]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP14:.+]] = select i1 %[[TMP13]], i32 %[[TMP4]], i32 2 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP14]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP15:.+]] = mul nuw i32 2, %[[TMP12]] -// CHECK-NEXT: %[[TMP16:.+]] = add nuw i32 %[[TMP15]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[TMP16]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP17:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP17]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP20:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP20]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP22:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP19]], %[[TMP22]] -// CHECK-NEXT: %[[TMP23:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP24:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP24]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP23]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP25:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP25]] -// CHECK-NEXT: %[[TMP26:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP27:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP27]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP26]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 %[[OMP_GLOBAL_THREAD_NUM]]) -// CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM9:.+]] = call i32 @__kmpc_global_thread_num(ptr @1) -// CHECK-NEXT: call void @__kmpc_barrier(ptr @2, i32 %[[OMP_GLOBAL_THREAD_NUM9]]) -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_partial_factor_for(float *a, float *b, float *c, float *d) { #pragma omp for @@ -147,72 +16,182 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 2} +// CHECK-LABEL: define {{[^@]+}}@unroll_partial_factor_for +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[DOTCOUNT]], 2 +// CHECK-NEXT: [[TMP4:%.*]] = urem i32 [[DOTCOUNT]], 2 +// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP3]], [[TMP6]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[OMP_FLOOR0_TRIPCOUNT]], 1 +// CHECK-NEXT: store i32 [[TMP7]], ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: store i32 1, ptr [[P_STRIDE]], align 4 +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0) +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], [[TMP8]] +// CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[TMP11]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OMP_FLOOR0_IV]], [[TMP8]] +// CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP4]], i32 2 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP15:%.*]] = mul nuw i32 2, [[TMP12]] +// CHECK-NEXT: [[TMP16:%.*]] = add nuw i32 [[TMP15]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP16]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP25]] +// CHECK-NEXT: [[TMP26:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM9]]) +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c --- a/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c +++ b/clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c @@ -5,119 +5,6 @@ #ifndef HEADER #define HEADER -// CHECK-LABEL: define {{.*}}@unroll_unroll_partial_heuristic( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[A_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[B_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[C_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[D_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[I:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[AGG_CAPTURED:.+]] = alloca %struct.anon, align 8 -// CHECK-NEXT: %[[AGG_CAPTURED1:.+]] = alloca %struct.anon.0, align 4 -// CHECK-NEXT: %[[DOTCOUNT_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[A:.+]], ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[B:.+]], ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[C:.+]], ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[D:.+]], ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: store i32 0, ptr %[[I]], align 4 -// CHECK-NEXT: %[[TMP0:.+]] = getelementptr inbounds %struct.anon, ptr %[[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store ptr %[[I]], ptr %[[TMP0]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[AGG_CAPTURED1]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: store i32 %[[TMP2]], ptr %[[TMP1]], align 4 -// CHECK-NEXT: call void @__captured_stmt(ptr %[[DOTCOUNT_ADDR]], ptr %[[AGG_CAPTURED]]) -// CHECK-NEXT: %[[DOTCOUNT:.+]] = load i32, ptr %[[DOTCOUNT_ADDR]], align 4 -// CHECK-NEXT: br label %[[OMP_LOOP_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_PREHEADER]]: -// CHECK-NEXT: %[[TMP3:.+]] = udiv i32 %[[DOTCOUNT]], 8 -// CHECK-NEXT: %[[TMP4:.+]] = urem i32 %[[DOTCOUNT]], 8 -// CHECK-NEXT: %[[TMP5:.+]] = icmp ne i32 %[[TMP4]], 0 -// CHECK-NEXT: %[[TMP6:.+]] = zext i1 %[[TMP5]] to i32 -// CHECK-NEXT: %[[OMP_FLOOR0_TRIPCOUNT:.+]] = add nuw i32 %[[TMP3]], %[[TMP6]] -// CHECK-NEXT: br label %[[OMP_FLOOR0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_HEADER]]: -// CHECK-NEXT: %[[OMP_FLOOR0_IV:.+]] = phi i32 [ 0, %[[OMP_FLOOR0_PREHEADER]] ], [ %[[OMP_FLOOR0_NEXT:.+]], %[[OMP_FLOOR0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_FLOOR0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_COND]]: -// CHECK-NEXT: %[[OMP_FLOOR0_CMP:.+]] = icmp ult i32 %[[OMP_FLOOR0_IV]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: br i1 %[[OMP_FLOOR0_CMP]], label %[[OMP_FLOOR0_BODY:.+]], label %[[OMP_FLOOR0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_BODY]]: -// CHECK-NEXT: %[[TMP7:.+]] = icmp eq i32 %[[OMP_FLOOR0_IV]], %[[OMP_FLOOR0_TRIPCOUNT]] -// CHECK-NEXT: %[[TMP8:.+]] = select i1 %[[TMP7]], i32 %[[TMP4]], i32 8 -// CHECK-NEXT: br label %[[OMP_TILE0_PREHEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_PREHEADER]]: -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_HEADER]]: -// CHECK-NEXT: %[[OMP_TILE0_IV:.+]] = phi i32 [ 0, %[[OMP_TILE0_PREHEADER]] ], [ %[[OMP_TILE0_NEXT:.+]], %[[OMP_TILE0_INC:.+]] ] -// CHECK-NEXT: br label %[[OMP_TILE0_COND:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_COND]]: -// CHECK-NEXT: %[[OMP_TILE0_CMP:.+]] = icmp ult i32 %[[OMP_TILE0_IV]], %[[TMP8]] -// CHECK-NEXT: br i1 %[[OMP_TILE0_CMP]], label %[[OMP_TILE0_BODY:.+]], label %[[OMP_TILE0_EXIT:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_BODY]]: -// CHECK-NEXT: %[[TMP9:.+]] = mul nuw i32 8, %[[OMP_FLOOR0_IV]] -// CHECK-NEXT: %[[TMP10:.+]] = add nuw i32 %[[TMP9]], %[[OMP_TILE0_IV]] -// CHECK-NEXT: br label %[[OMP_LOOP_BODY:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_BODY]]: -// CHECK-NEXT: call void @__captured_stmt.1(ptr %[[I]], i32 %[[TMP10]], ptr %[[AGG_CAPTURED1]]) -// CHECK-NEXT: %[[TMP11:.+]] = load ptr, ptr %[[B_ADDR]], align 8 -// CHECK-NEXT: %[[TMP12:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP12]] to i64 -// CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, ptr %[[TMP11]], i64 %[[IDXPROM]] -// CHECK-NEXT: %[[TMP13:.+]] = load float, ptr %[[ARRAYIDX]], align 4 -// CHECK-NEXT: %[[TMP14:.+]] = load ptr, ptr %[[C_ADDR]], align 8 -// CHECK-NEXT: %[[TMP15:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM2:.+]] = sext i32 %[[TMP15]] to i64 -// CHECK-NEXT: %[[ARRAYIDX3:.+]] = getelementptr inbounds float, ptr %[[TMP14]], i64 %[[IDXPROM2]] -// CHECK-NEXT: %[[TMP16:.+]] = load float, ptr %[[ARRAYIDX3]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = fmul float %[[TMP13]], %[[TMP16]] -// CHECK-NEXT: %[[TMP17:.+]] = load ptr, ptr %[[D_ADDR]], align 8 -// CHECK-NEXT: %[[TMP18:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM4:.+]] = sext i32 %[[TMP18]] to i64 -// CHECK-NEXT: %[[ARRAYIDX5:.+]] = getelementptr inbounds float, ptr %[[TMP17]], i64 %[[IDXPROM4]] -// CHECK-NEXT: %[[TMP19:.+]] = load float, ptr %[[ARRAYIDX5]], align 4 -// CHECK-NEXT: %[[MUL6:.+]] = fmul float %[[MUL]], %[[TMP19]] -// CHECK-NEXT: %[[TMP20:.+]] = load ptr, ptr %[[A_ADDR]], align 8 -// CHECK-NEXT: %[[TMP21:.+]] = load i32, ptr %[[I]], align 4 -// CHECK-NEXT: %[[IDXPROM7:.+]] = sext i32 %[[TMP21]] to i64 -// CHECK-NEXT: %[[ARRAYIDX8:.+]] = getelementptr inbounds float, ptr %[[TMP20]], i64 %[[IDXPROM7]] -// CHECK-NEXT: store float %[[MUL6]], ptr %[[ARRAYIDX8]], align 4 -// CHECK-NEXT: br label %[[OMP_TILE0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_INC]]: -// CHECK-NEXT: %[[OMP_TILE0_NEXT]] = add nuw i32 %[[OMP_TILE0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_TILE0_HEADER]], !llvm.loop ![[LOOP3:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_TILE0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_TILE0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_INC]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_INC]]: -// CHECK-NEXT: %[[OMP_FLOOR0_NEXT]] = add nuw i32 %[[OMP_FLOOR0_IV]], 1 -// CHECK-NEXT: br label %[[OMP_FLOOR0_HEADER]], !llvm.loop ![[LOOP6:[0-9]+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_EXIT]]: -// CHECK-NEXT: br label %[[OMP_FLOOR0_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_FLOOR0_AFTER]]: -// CHECK-NEXT: br label %[[OMP_LOOP_AFTER:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[OMP_LOOP_AFTER]]: -// CHECK-NEXT: ret void -// CHECK-NEXT: } void unroll_unroll_partial_heuristic(float *a, float *b, float *c, float *d) { #pragma omp unroll partial #pragma omp unroll partial @@ -128,73 +15,164 @@ #endif // HEADER -// CHECK-LABEL: define {{.*}}@__captured_stmt( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[DISTANCE_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[DOTSTART:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTOP:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[DOTSTEP:.+]] = alloca i32, align 4 -// CHECK-NEXT: store ptr %[[DISTANCE:.+]], ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load ptr, ptr %[[TMP1]], align 8 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[TMP2]], align 4 -// CHECK-NEXT: store i32 %[[TMP3]], ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: store i32 2, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: store i32 1, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[TMP4:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[TMP5:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 %[[TMP4]], %[[TMP5]] -// CHECK-NEXT: br i1 %[[CMP]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_TRUE]]: -// CHECK-NEXT: %[[TMP6:.+]] = load i32, ptr %[[DOTSTOP]], align 4 -// CHECK-NEXT: %[[TMP7:.+]] = load i32, ptr %[[DOTSTART]], align 4 -// CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP6]], %[[TMP7]] -// CHECK-NEXT: %[[TMP8:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[SUB1:.+]] = sub i32 %[[TMP8]], 1 -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[SUB]], %[[SUB1]] -// CHECK-NEXT: %[[TMP9:.+]] = load i32, ptr %[[DOTSTEP]], align 4 -// CHECK-NEXT: %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP9]] -// CHECK-NEXT: br label %[[COND_END:.+]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_FALSE]]: -// CHECK-NEXT: br label %[[COND_END]] -// CHECK-EMPTY: -// CHECK-NEXT: [[COND_END]]: -// CHECK-NEXT: %[[COND:.+]] = phi i32 [ %[[DIV]], %[[COND_TRUE]] ], [ 0, %[[COND_FALSE]] ] -// CHECK-NEXT: %[[TMP10:.+]] = load ptr, ptr %[[DISTANCE_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[COND]], ptr %[[TMP10]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK-LABEL: define {{.*}}@__captured_stmt.1( -// CHECK-NEXT: [[ENTRY:.*]]: -// CHECK-NEXT: %[[LOOPVAR_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: %[[LOGICAL_ADDR:.+]] = alloca i32, align 4 -// CHECK-NEXT: %[[__CONTEXT_ADDR:.+]] = alloca ptr, align 8 -// CHECK-NEXT: store ptr %[[LOOPVAR:.+]], ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[LOGICAL:.+]], ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: store ptr %[[__CONTEXT:.+]], ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP0:.+]] = load ptr, ptr %[[__CONTEXT_ADDR]], align 8 -// CHECK-NEXT: %[[TMP1:.+]] = getelementptr inbounds %struct.anon.0, ptr %[[TMP0]], i32 0, i32 0 -// CHECK-NEXT: %[[TMP2:.+]] = load i32, ptr %[[TMP1]], align 4 -// CHECK-NEXT: %[[TMP3:.+]] = load i32, ptr %[[LOGICAL_ADDR]], align 4 -// CHECK-NEXT: %[[MUL:.+]] = mul i32 1, %[[TMP3]] -// CHECK-NEXT: %[[ADD:.+]] = add i32 %[[TMP2]], %[[MUL]] -// CHECK-NEXT: %[[TMP4:.+]] = load ptr, ptr %[[LOOPVAR_ADDR]], align 8 -// CHECK-NEXT: store i32 %[[ADD]], ptr %[[TMP4]], align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT: } -// CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4} -// CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51} -// CHECK: ![[META2:[0-9]+]] = -// CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]} -// CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"} -// CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 8} -// CHECK: ![[LOOP6]] = distinct !{![[LOOP6]], ![[LOOPPROP4]]} +// CHECK-LABEL: define {{[^@]+}}@unroll_unroll_partial_heuristic +// CHECK-SAME: (ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 +// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK-NEXT: call void @__captured_stmt(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]) +// CHECK-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4 +// CHECK-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]] +// CHECK: omp_loop.preheader: +// CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[DOTCOUNT]], 8 +// CHECK-NEXT: [[TMP4:%.*]] = urem i32 [[DOTCOUNT]], 8 +// CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +// CHECK-NEXT: [[OMP_FLOOR0_TRIPCOUNT:%.*]] = add nuw i32 [[TMP3]], [[TMP6]] +// CHECK-NEXT: br label [[OMP_FLOOR0_PREHEADER:%.*]] +// CHECK: omp_floor0.preheader: +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER:%.*]] +// CHECK: omp_floor0.header: +// CHECK-NEXT: [[OMP_FLOOR0_IV:%.*]] = phi i32 [ 0, [[OMP_FLOOR0_PREHEADER]] ], [ [[OMP_FLOOR0_NEXT:%.*]], [[OMP_FLOOR0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_FLOOR0_COND:%.*]] +// CHECK: omp_floor0.cond: +// CHECK-NEXT: [[OMP_FLOOR0_CMP:%.*]] = icmp ult i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: br i1 [[OMP_FLOOR0_CMP]], label [[OMP_FLOOR0_BODY:%.*]], label [[OMP_FLOOR0_EXIT:%.*]] +// CHECK: omp_floor0.body: +// CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[OMP_FLOOR0_IV]], [[OMP_FLOOR0_TRIPCOUNT]] +// CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP4]], i32 8 +// CHECK-NEXT: br label [[OMP_TILE0_PREHEADER:%.*]] +// CHECK: omp_tile0.preheader: +// CHECK-NEXT: br label [[OMP_TILE0_HEADER:%.*]] +// CHECK: omp_tile0.header: +// CHECK-NEXT: [[OMP_TILE0_IV:%.*]] = phi i32 [ 0, [[OMP_TILE0_PREHEADER]] ], [ [[OMP_TILE0_NEXT:%.*]], [[OMP_TILE0_INC:%.*]] ] +// CHECK-NEXT: br label [[OMP_TILE0_COND:%.*]] +// CHECK: omp_tile0.cond: +// CHECK-NEXT: [[OMP_TILE0_CMP:%.*]] = icmp ult i32 [[OMP_TILE0_IV]], [[TMP8]] +// CHECK-NEXT: br i1 [[OMP_TILE0_CMP]], label [[OMP_TILE0_BODY:%.*]], label [[OMP_TILE0_EXIT:%.*]] +// CHECK: omp_tile0.body: +// CHECK-NEXT: [[TMP9:%.*]] = mul nuw i32 8, [[OMP_FLOOR0_IV]] +// CHECK-NEXT: [[TMP10:%.*]] = add nuw i32 [[TMP9]], [[OMP_TILE0_IV]] +// CHECK-NEXT: br label [[OMP_LOOP_BODY:%.*]] +// CHECK: omp_loop.body: +// CHECK-NEXT: call void @__captured_stmt.1(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED1]]) +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM2]] +// CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP13]], [[TMP16]] +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM4]] +// CHECK-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +// CHECK-NEXT: [[MUL6:%.*]] = fmul float [[MUL]], [[TMP19]] +// CHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM7]] +// CHECK-NEXT: store float [[MUL6]], ptr [[ARRAYIDX8]], align 4 +// CHECK-NEXT: br label [[OMP_TILE0_INC]] +// CHECK: omp_tile0.inc: +// CHECK-NEXT: [[OMP_TILE0_NEXT]] = add nuw i32 [[OMP_TILE0_IV]], 1 +// CHECK-NEXT: br label [[OMP_TILE0_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK: omp_tile0.exit: +// CHECK-NEXT: br label [[OMP_TILE0_AFTER:%.*]] +// CHECK: omp_tile0.after: +// CHECK-NEXT: br label [[OMP_FLOOR0_INC]] +// CHECK: omp_floor0.inc: +// CHECK-NEXT: [[OMP_FLOOR0_NEXT]] = add nuw i32 [[OMP_FLOOR0_IV]], 1 +// CHECK-NEXT: br label [[OMP_FLOOR0_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK: omp_floor0.exit: +// CHECK-NEXT: br label [[OMP_FLOOR0_AFTER:%.*]] +// CHECK: omp_floor0.after: +// CHECK-NEXT: br label [[OMP_LOOP_AFTER:%.*]] +// CHECK: omp_loop.after: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[DOTSTART:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4 +// CHECK-NEXT: store i32 2, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: store i32 1, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]] +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4 +// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4 +// CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]] +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ] +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8 +// CHECK-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__captured_stmt.1 +// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]] +// CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]] +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 +// CHECK-NEXT: ret void +// diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp --- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,70 +444,68 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP18]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP6]], ptr [[TMP14]], i32 1, ptr [[TMP23]], ptr [[TMP24]], i64 [[TMP27]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP23]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP20]], i32 1, ptr [[TMP29]], ptr [[TMP30]], i64 [[TMP33]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -577,7 +586,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -588,7 +597,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp --- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,70 +444,68 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 -// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP18]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP6]], ptr [[TMP14]], i32 1, ptr [[TMP23]], ptr [[TMP24]], i64 [[TMP27]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP20:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP20]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP21]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP23]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP20]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP24]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP24]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP21]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP20]], i32 1, ptr [[TMP29]], ptr [[TMP30]], i64 [[TMP33]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -577,7 +586,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -588,7 +597,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/metadirective_device_kind_codegen.c b/clang/test/OpenMP/metadirective_device_kind_codegen.c --- a/clang/test/OpenMP/metadirective_device_kind_codegen.c +++ b/clang/test/OpenMP/metadirective_device_kind_codegen.c @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple aarch64-unknown-linux -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c -triple ppc64le-unknown-linux -emit-llvm %s -o - | FileCheck %s @@ -7,6 +8,26 @@ void bar(void); +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK-NEXT: ret void +// void foo(void) { #pragma omp metadirective when(device = {kind(any)} \ : parallel) @@ -35,46 +56,12 @@ ; } -// CHECK-LABEL: define {{.+}} void @foo() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_1:@.+]] to void -// CHECK-NEXT: @__kmpc_push_num_threads -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_1]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: call void @bar -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/metadirective_device_kind_codegen.cpp b/clang/test/OpenMP/metadirective_device_kind_codegen.cpp --- a/clang/test/OpenMP/metadirective_device_kind_codegen.cpp +++ b/clang/test/OpenMP/metadirective_device_kind_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple aarch64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple ppc64le-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s @@ -8,6 +9,26 @@ void bar(); +// CHECK-LABEL: @_Z3foov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 4) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.5*)* @.omp_outlined..6 to void (i32*, i32*, ...)*), %struct.anon.5* [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK-NEXT: ret void +// void foo() { #pragma omp metadirective when(device = {kind(any)} \ : parallel) @@ -36,46 +57,12 @@ ; } -// CHECK-LABEL: define {{.+}} void @_Z3foov() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_1:@.+]] to void -// CHECK-NEXT: @__kmpc_push_num_threads -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_1]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: void @_Z3barv() -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/metadirective_implementation_codegen.cpp b/clang/test/OpenMP/metadirective_implementation_codegen.cpp --- a/clang/test/OpenMP/metadirective_implementation_codegen.cpp +++ b/clang/test/OpenMP/metadirective_implementation_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple aarch64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple ppc64le-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - -fsanitize-address-use-after-scope | FileCheck %s @@ -8,6 +9,22 @@ void bar(); +// CHECK-LABEL: @_Z3foov( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.1*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), %struct.anon.1* [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.2*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), %struct.anon.2* [[OMP_OUTLINED_ARG_AGG_3]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.3*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), %struct.anon.3* [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.4*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.anon.4* [[OMP_OUTLINED_ARG_AGG_5]]) +// CHECK-NEXT: ret void +// void foo() { #pragma omp metadirective when(implementation = {vendor(score(0) \ : llvm)}, \ @@ -37,40 +54,15 @@ ; } -// CHECK-LABEL: void @_Z3foov() -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_2:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_3:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_4:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_5:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_6:@.+]] to void -// CHECK: @__kmpc_fork_call(%struct.ident_t* {{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OUTLINED_7:@.+]] to void -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_2]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_3]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_4]]( -// CHECK: @_Z3barv -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_5]]( // NO-CHECK: call void @__kmpc_for_static_init // NO-CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_6]]( -// CHECK: call void @__kmpc_for_static_init -// CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void -// CHECK: define internal void [[OUTLINED_7]]( // NO-CHECK: call void @__kmpc_for_static_init // NO-CHECK: call void @__kmpc_for_static_fini -// CHECK: ret void #endif diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -91,7 +91,7 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4 // CHECK1-NEXT: store double 3.000000e+00, ptr [[B]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR9:[0-9]+]] // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -109,24 +109,33 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[BAR_B:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: [[TMP1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP3]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP1]], i64 1) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[BAR_A]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[BAR_A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP1]] to double // CHECK1-NEXT: store double [[CONV]], ptr addrspacecast (ptr addrspace(3) @bar_b to ptr), align 8 -// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -140,7 +149,7 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -33,9 +33,11 @@ // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14 // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] @@ -44,16 +46,26 @@ // CHECK-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: store i32 10, ptr [[A]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK-NEXT: [[TMP3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP3]]) +// CHECK-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP3]], i64 8) // CHECK-NEXT: store i32 100, ptr [[B]], align 4 // CHECK-NEXT: store i32 1000, ptr [[C]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[B]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 // CHECK-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2) +// CHECK-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG2]], ptr [[TMP8]]) +// CHECK-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], ptr [[TMP9]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP9]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 16) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4) // CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4) // CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) @@ -63,16 +75,18 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK-NEXT: store i32 1000, ptr [[TMP0]], align 4 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: store i32 1000, ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // // @@ -86,33 +100,32 @@ // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4:[0-9]+]] // CHECK-NEXT: ret void // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[C1:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK-NEXT: store ptr [[C]], ptr [[C1]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10000 -// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 10000 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK-NEXT: ret void // // @@ -126,12 +139,8 @@ // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR4]] +// CHECK-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -32,7 +32,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -49,12 +49,20 @@ // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR5:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK4-NEXT: ret void // CHECK4: worker.exit: @@ -62,137 +70,146 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK4-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[ARGC]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i64 40, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP2]], i64 40, i1 false) // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP22]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to ptr -// CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to ptr -// CHECK4-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP31]], align 8 -// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK4-NEXT: store ptr [[C1]], ptr [[TMP33]], align 8 -// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP35]], align 8 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK4-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK4-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7) +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK4-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK4-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP28]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP29]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[ARGC]], ptr [[TMP30]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP32]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK4-NEXT: store ptr [[C]], ptr [[TMP33]], align 8 +// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP34]], align 8 +// CHECK4-NEXT: [[TMP35:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 56) +// CHECK4-NEXT: [[TMP36:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP35]]) +// CHECK4-NEXT: [[TMP37:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK4-NEXT: store [[STRUCT_ANON_0]] [[TMP37]], ptr [[TMP36]], align 8 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK4-NEXT: [[TMP39:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK4-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP41]], i32 [[TMP39]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP36]]) +// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[TMP35]], i64 56) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: // CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK4-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK4-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -// CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK4: cond.true12: -// CHECK4-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: br label [[COND_END14:%.*]] -// CHECK4: cond.false13: +// CHECK4-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK4: cond.true10: +// CHECK4-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END12:%.*]] +// CHECK4: cond.false11: // CHECK4-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END14]] -// CHECK4: cond.end14: -// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] -// CHECK4-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END12]] +// CHECK4: cond.end12: +// CHECK4-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE10]] ], [ [[TMP51]], [[COND_FALSE11]] ] +// CHECK4-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP52]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] @@ -206,27 +223,21 @@ // CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 // CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i64 40, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[C]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 40) +// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[C]], i64 40) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -236,103 +247,107 @@ // CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK4-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK4-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP20]] to i32 // CHECK4-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP2]], i64 40, i1 false) -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP10]], i64 40, i1 false) +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] -// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[CONV5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK4-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP25]] +// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I6]]) #[[ATTR8:[0-9]+]] -// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B4]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[C5]], i64 0, i64 [[IDXPROM13]] -// CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM17]] -// CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX18]]) #[[ATTR8]] -// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] -// CHECK4-NEXT: store i32 [[ADD20]], ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR8:[0-9]+]] +// CHECK4-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i64 0, i64 [[IDXPROM]] +// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK4-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[C]], i64 0, i64 [[IDXPROM11]] +// CHECK4-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX12]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD10]], [[CALL13]] +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK4-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK4-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP14]], i64 0, i64 [[IDXPROM15]] +// CHECK4-NEXT: [[CALL17:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX16]]) #[[ATTR8]] +// CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[ADD14]], [[CALL17]] +// CHECK4-NEXT: store i32 [[ADD18]], ptr [[TMP8]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK4-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP33]]) +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK4-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[C5]], i64 40, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP12]], ptr align 4 [[C]], i64 40, i1 false) // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: // CHECK4-NEXT: br label [[OMP_PRECOND_END]] @@ -348,7 +363,7 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -365,12 +380,20 @@ // CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK5: user_code.entry: // CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR5:[0-9]+]] +// CHECK5-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK5-NEXT: ret void // CHECK5: worker.exit: @@ -378,135 +401,144 @@ // // // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK5-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 40) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[ARGC]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 40) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK5-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i32 40, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 [[TMP2]], i32 40, i1 false) // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP22]], [[ADD]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to ptr -// CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to ptr -// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK5-NEXT: store ptr [[B4]], ptr [[TMP29]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK5-NEXT: store ptr [[C1]], ptr [[TMP31]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP33]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK5-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 -// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[ARGC]], ptr [[TMP28]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP30]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[C]], ptr [[TMP31]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store ptr [[TMP10]], ptr [[TMP32]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 28) +// CHECK5-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK5-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK5-NEXT: [[TMP37:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK5-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP39]], i32 [[TMP37]], i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK5-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i32 28) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: // CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK5: cond.true12: -// CHECK5-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: br label [[COND_END14:%.*]] -// CHECK5: cond.false13: +// CHECK5-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK5: cond.true10: +// CHECK5-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: br label [[COND_END12:%.*]] +// CHECK5: cond.false11: // CHECK5-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END14]] -// CHECK5: cond.end14: -// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ] -// CHECK5-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END12]] +// CHECK5: cond.end12: +// CHECK5-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE10]] ], [ [[TMP49]], [[COND_FALSE11]] ] +// CHECK5-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP50]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] @@ -520,27 +552,21 @@ // CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 // CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP1]], ptr align 4 [[C1]], i32 40, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[C]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: -// CHECK5-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 40) +// CHECK5-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 40) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -550,97 +576,101 @@ // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP17]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B3]], ptr align 4 [[TMP2]], i32 40, i1 false) -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 [[TMP10]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP24]], [[TMP25]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR8:[0-9]+]] -// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP19]] -// CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP20]] -// CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX11]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]] -// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK5-NEXT: store i32 [[ADD16]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I3]]) #[[ATTR8:[0-9]+]] +// CHECK5-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[CALL]], [[CALL5]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B]], i32 0, i32 [[TMP27]] +// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD6]], [[CALL7]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[C]], i32 0, i32 [[TMP28]] +// CHECK5-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX9]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD8]], [[CALL10]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP14]], i32 0, i32 [[TMP29]] +// CHECK5-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX12]]) #[[ATTR8]] +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD11]], [[CALL13]] +// CHECK5-NEXT: store i32 [[ADD14]], ptr [[TMP8]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] +// CHECK5-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]]) -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP33]]) +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK5-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[C4]], i32 40, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP12]], ptr align 4 [[C]], i32 40, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: br label [[OMP_PRECOND_END]] diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp --- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -122,220 +122,220 @@ // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9 // CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP68]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP70]], align 8 -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9 -// CHECK1-NEXT: store ptr null, ptr [[TMP80]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP83]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 10 -// CHECK1-NEXT: store ptr null, ptr [[TMP85]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 10 +// CHECK1-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP88]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 11, ptr [[TMP89]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP91]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP92]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP93]], align 8 -// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP94]], align 8 -// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP95]], align 8 -// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP96]], align 8 -// CHECK1-NEXT: [[TMP97:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0 -// CHECK1-NEXT: br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 11, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP64]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP76:%.*]] = icmp ne i32 [[TMP75]], 0 +// CHECK1-NEXT: br i1 [[TMP76]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP11]], ptr [[TMP12]], ptr [[TMP13]], ptr [[TMP14]], ptr [[A]], ptr [[TMP15]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP99:%.*]] = load ptr, ptr [[B]], align 8 -// CHECK1-NEXT: store ptr [[TMP99]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP100:%.*]] = load ptr, ptr [[C]], align 8 -// CHECK1-NEXT: store ptr [[TMP100]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP101:%.*]] = load ptr, ptr [[L]], align 8 -// CHECK1-NEXT: store ptr [[TMP101]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP102:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP103:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP104:%.*]] = load ptr, ptr [[D]], align 8 -// CHECK1-NEXT: [[TMP105:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP108:%.*]] = load ptr, ptr [[TMP107]], align 8 -// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 -// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP114:%.*]] = load ptr, ptr [[TMP113]], align 8 -// CHECK1-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP117:%.*]] = load ptr, ptr [[TMP116]], align 8 -// CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 4 -// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP105]], i32 0, i32 4 -// CHECK1-NEXT: [[TMP120:%.*]] = load ptr, ptr [[TMP119]], align 8 -// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP121]], align 8 -// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP123]], align 8 -// CHECK1-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[B]], align 8 +// CHECK1-NEXT: store ptr [[TMP77]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[C]], align 8 +// CHECK1-NEXT: store ptr [[TMP78]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[L]], align 8 +// CHECK1-NEXT: store ptr [[TMP79]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[D]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP89:%.*]] = load ptr, ptr [[TMP88]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP95:%.*]] = load ptr, ptr [[TMP94]], align 8 +// CHECK1-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP83]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP98:%.*]] = load ptr, ptr [[TMP97]], align 8 +// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP99]], align 8 +// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP100]], align 8 +// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP102]], align 8 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP103]], align 8 +// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP104]], align 8 +// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP81]], ptr [[TMP105]], align 8 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP81]], ptr [[TMP106]], align 8 +// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP107]], align 8 +// CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP82]], ptr [[TMP108]], align 8 +// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP82]], ptr [[TMP109]], align 8 +// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP110]], align 8 +// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP111]], align 8 +// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP112]], align 8 +// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP113]], align 8 +// CHECK1-NEXT: [[TMP114:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP114]], align 8 +// CHECK1-NEXT: [[TMP115:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP115]], align 8 +// CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP116]], align 8 +// CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP84]], ptr [[TMP117]], align 8 +// CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP118]], align 8 +// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP119]], align 8 +// CHECK1-NEXT: [[TMP120:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP120]], align 8 +// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP89]], ptr [[TMP121]], align 8 +// CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP122]], align 8 +// CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP90]], ptr [[TMP123]], align 8 +// CHECK1-NEXT: [[TMP124:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP124]], align 8 +// CHECK1-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 8 // CHECK1-NEXT: store ptr null, ptr [[TMP125]], align 8 -// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP102]], ptr [[TMP126]], align 8 -// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP102]], ptr [[TMP128]], align 8 -// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP130]], align 8 -// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP103]], ptr [[TMP131]], align 8 -// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP103]], ptr [[TMP133]], align 8 -// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP135]], align 8 -// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP104]], ptr [[TMP136]], align 8 -// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP104]], ptr [[TMP138]], align 8 -// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8 -// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP141]], align 8 -// CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP143]], align 8 -// CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP145]], align 8 -// CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP105]], ptr [[TMP146]], align 8 -// CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 5 -// CHECK1-NEXT: store ptr [[TMP105]], ptr [[TMP148]], align 8 -// CHECK1-NEXT: [[TMP150:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 5 -// CHECK1-NEXT: store ptr null, ptr [[TMP150]], align 8 -// CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP106]], ptr [[TMP151]], align 8 -// CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 6 -// CHECK1-NEXT: store ptr [[TMP108]], ptr [[TMP153]], align 8 -// CHECK1-NEXT: [[TMP155:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP155]], align 8 -// CHECK1-NEXT: [[TMP156:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP109]], ptr [[TMP156]], align 8 -// CHECK1-NEXT: [[TMP158:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 7 -// CHECK1-NEXT: store ptr [[TMP111]], ptr [[TMP158]], align 8 -// CHECK1-NEXT: [[TMP160:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP160]], align 8 -// CHECK1-NEXT: [[TMP161:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP112]], ptr [[TMP161]], align 8 -// CHECK1-NEXT: [[TMP163:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 8 -// CHECK1-NEXT: store ptr [[TMP114]], ptr [[TMP163]], align 8 -// CHECK1-NEXT: [[TMP165:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP165]], align 8 -// CHECK1-NEXT: [[TMP166:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP115]], ptr [[TMP166]], align 8 -// CHECK1-NEXT: [[TMP168:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 9 -// CHECK1-NEXT: store ptr [[TMP117]], ptr [[TMP168]], align 8 -// CHECK1-NEXT: [[TMP170:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 9 -// CHECK1-NEXT: store ptr null, ptr [[TMP170]], align 8 -// CHECK1-NEXT: [[TMP171:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP118]], ptr [[TMP171]], align 8 -// CHECK1-NEXT: [[TMP173:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 10 -// CHECK1-NEXT: store ptr [[TMP120]], ptr [[TMP173]], align 8 -// CHECK1-NEXT: [[TMP175:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 10 -// CHECK1-NEXT: store ptr null, ptr [[TMP175]], align 8 -// CHECK1-NEXT: [[TMP176:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP177:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP93]], ptr [[TMP126]], align 8 +// CHECK1-NEXT: [[TMP127:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP95]], ptr [[TMP127]], align 8 +// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 9 +// CHECK1-NEXT: store ptr null, ptr [[TMP128]], align 8 +// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP96]], ptr [[TMP129]], align 8 +// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 10 +// CHECK1-NEXT: store ptr [[TMP98]], ptr [[TMP130]], align 8 +// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 10 +// CHECK1-NEXT: store ptr null, ptr [[TMP131]], align 8 +// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP178:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP178]], align 4 -// CHECK1-NEXT: [[TMP179:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK1-NEXT: store i32 11, ptr [[TMP179]], align 4 -// CHECK1-NEXT: [[TMP180:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP176]], ptr [[TMP180]], align 8 -// CHECK1-NEXT: [[TMP181:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP177]], ptr [[TMP181]], align 8 -// CHECK1-NEXT: [[TMP182:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP182]], align 8 -// CHECK1-NEXT: [[TMP183:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP183]], align 8 -// CHECK1-NEXT: [[TMP184:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP184]], align 8 -// CHECK1-NEXT: [[TMP185:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP185]], align 8 -// CHECK1-NEXT: [[TMP186:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP186]], align 8 -// CHECK1-NEXT: [[TMP187:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK1-NEXT: [[TMP188:%.*]] = icmp ne i32 [[TMP187]], 0 -// CHECK1-NEXT: br i1 [[TMP188]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP134]], align 4 +// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CHECK1-NEXT: store i32 11, ptr [[TMP135]], align 4 +// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP132]], ptr [[TMP136]], align 8 +// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP133]], ptr [[TMP137]], align 8 +// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP138]], align 8 +// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP139]], align 8 +// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP141]], align 8 +// CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP142]], align 8 +// CHECK1-NEXT: [[TMP143:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK1-NEXT: [[TMP144:%.*]] = icmp ne i32 [[TMP143]], 0 +// CHECK1-NEXT: br i1 [[TMP144]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK1: omp_offload.failed11: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43(ptr [[ARGC_ADDR]], ptr [[TMP102]], ptr [[TMP103]], ptr [[TMP104]], ptr [[A]], ptr [[TMP105]]) #[[ATTR4]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43(ptr [[ARGC_ADDR]], ptr [[TMP80]], ptr [[TMP81]], ptr [[TMP82]], ptr [[A]], ptr [[TMP83]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK1: omp_offload.cont12: -// CHECK1-NEXT: [[TMP189:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: [[TMP145:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN1S3fooEv(ptr noundef nonnull align 4 dereferenceable(4) @s) -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP189]], [[CALL]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP145]], [[CALL]] // CHECK1-NEXT: ret i32 [[ADD]] // // @@ -373,16 +373,16 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP4]], i64 40, i1 false) // CHECK1-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[B5]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B5]], align 4 // CHECK1-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[C7]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C7]], align 4 // CHECK1-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP11]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP9]]) // CHECK1-NEXT: ret void // // @@ -398,6 +398,7 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -412,69 +413,78 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 6, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], ptr [[TMP3]], ptr [[TMP8]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK1-NEXT: [[L:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) -// CHECK1-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4 -// CHECK1-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4 -// CHECK1-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP14]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP12]], i64 40, i1 false) +// CHECK1-NEXT: store ptr [[L]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[B]], align 4 +// CHECK1-NEXT: store ptr [[B]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP19]]) // CHECK1-NEXT: ret void // // @@ -504,106 +514,106 @@ // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27(ptr [[THIS1]], ptr [[TMP2]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[L]], align 8 -// CHECK1-NEXT: store ptr [[TMP33]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP34]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP35]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP36]], ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[L]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP28]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP28]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.6, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.7, ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 -// CHECK1-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP42]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP43]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP41]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.6, ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.7, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: br i1 [[TMP52]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] // CHECK1: omp_offload.failed7: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29(ptr [[THIS1]], ptr [[TMP34]]) #[[ATTR4]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29(ptr [[THIS1]], ptr [[TMP28]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] // CHECK1: omp_offload.cont8: // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[L]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooIZN1S3fooEvEUlvE_EiRKT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP66]]) -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP65]], [[CALL]] +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[L]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooIZN1S3fooEvEUlvE_EiRKT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP54]]) +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP53]], [[CALL]] // CHECK1-NEXT: ret i32 [[ADD]] // // @@ -623,8 +633,8 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) // CHECK1-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP3]]) // CHECK1-NEXT: ret void // // @@ -647,38 +657,44 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[TMP0]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[L:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) -// CHECK1-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP5]], i64 8, i1 false) +// CHECK1-NEXT: store ptr [[L]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -698,40 +714,40 @@ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.9, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18(ptr [[TMP1]]) #[[ATTR4]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -744,32 +760,37 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T]], ptr align 8 [[TMP3]], i64 8, i1 false) +// CHECK1-NEXT: store ptr [[T]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP4]]) // CHECK1-NEXT: ret void // @@ -801,11 +822,11 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP3]], i64 8, i1 false) // CHECK2-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR9:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -831,7 +852,8 @@ // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -842,12 +864,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP9]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i64 16) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -855,30 +882,31 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L1:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 -// CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[L:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) -// CHECK2-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP5]], i64 8, i1 false) +// CHECK2-NEXT: store ptr [[L]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -920,29 +948,29 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) // CHECK2-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[B5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[B5]], align 4 // CHECK2-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[C7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[C7]], align 4 // CHECK2-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP8]], align 8 // CHECK2-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP12]], i32 0, i32 4 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP10]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR9]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -961,7 +989,8 @@ // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [6 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -981,23 +1010,28 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP21]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 48) +// CHECK2-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP17]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_1]] [[TMP19]], ptr [[TMP18]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP18]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP17]], i64 48) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1005,73 +1039,70 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L3:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[L:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A10:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) -// CHECK2-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4 -// CHECK2-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4 -// CHECK2-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[ARGC5]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP14]], i32 0, i32 4 -// CHECK2-NEXT: store ptr [[A10]], ptr [[TMP21]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP12]], i64 40, i1 false) +// CHECK2-NEXT: store ptr [[L]], ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[B]], align 4 +// CHECK2-NEXT: store ptr [[B]], ptr [[_TMP4]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[C]], ptr [[_TMP5]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP19]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP27]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -1080,7 +1111,8 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 @@ -1089,10 +1121,15 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP7]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 8) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1100,25 +1137,27 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[T1:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 -// CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[T:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) -// CHECK2-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T]], ptr align 8 [[TMP3]], i64 8, i1 false) +// CHECK2-NEXT: store ptr [[T]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -1160,29 +1199,29 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) // CHECK3-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[B5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[B5]], align 4 // CHECK3-NEXT: store ptr [[B5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[C7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[C7]], align 4 // CHECK3-NEXT: store ptr [[C7]], ptr [[_TMP8]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP8]], align 8 // CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP12]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP20]]) #[[ATTR7:[0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP10]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP18]]) #[[ATTR9:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1201,7 +1240,8 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [6 x ptr], align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -1221,23 +1261,28 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [6 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP21]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 6) +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 48) +// CHECK3-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP17]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK3-NEXT: store [[STRUCT_ANON]] [[TMP19]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP18]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP17]], i64 48) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1245,73 +1290,70 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK3-NEXT: [[L:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C8:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A10:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L3]], ptr align 8 [[TMP5]], i64 40, i1 false) -// CHECK3-NEXT: store ptr [[L3]], ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARGC5]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[B6]], align 4 -// CHECK3-NEXT: store ptr [[B6]], ptr [[_TMP7]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[C8]], align 4 -// CHECK3-NEXT: store ptr [[C8]], ptr [[_TMP9]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[A10]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[ARGC5]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP9]], align 8 -// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP14]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[A10]], ptr [[TMP21]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP11]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP12]], i64 40, i1 false) +// CHECK3-NEXT: store ptr [[L]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARGC]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[B]], align 4 +// CHECK3-NEXT: store ptr [[B]], ptr [[_TMP4]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP5]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC]], ptr [[TMP20]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]] +// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[TMP19]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP26]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @"_ZZ4mainENK3$_0clEv"(ptr noundef nonnull align 8 dereferenceable(40) [[TMP27]]) #[[ATTR9]] // CHECK3-NEXT: ret void // // @@ -1335,11 +1377,11 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP3]], i64 8, i1 false) // CHECK3-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR7]] +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP6]]) #[[ATTR9]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1365,7 +1407,8 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1376,12 +1419,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK3-NEXT: store [[STRUCT_ANON_1]] [[TMP9]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i64 16) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1389,30 +1437,31 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[L:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L1]], ptr align 8 [[TMP2]], i64 8, i1 false) -// CHECK3-NEXT: store ptr [[L1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[L]], ptr align 8 [[TMP5]], i64 8, i1 false) +// CHECK3-NEXT: store ptr [[L]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP8]]) #[[ATTR9]] // CHECK3-NEXT: ret void // // @@ -1421,7 +1470,8 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 @@ -1430,10 +1480,15 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK3-NEXT: store [[STRUCT_ANON_2]] [[TMP7]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 8) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1441,24 +1496,26 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[T1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 -// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[T:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T1]], ptr align 8 [[TMP1]], i64 8, i1 false) -// CHECK3-NEXT: store ptr [[T1]], ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[T]], ptr align 8 [[TMP3]], i64 8, i1 false) +// CHECK3-NEXT: store ptr [[T]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZZN1S3fooEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR9]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp --- a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp @@ -155,20 +155,20 @@ // CHECK: define internal void @{{.+}}omp_loop_ref{{.+}}( // CHECK: [[BODY:%body.addr]] = alloca %class.anon* // CHECK: [[TMP:%tmp]] = alloca %class.anon* -// CHECK: [[BODY_REF:%body_ref]] = alloca %class.anon.1* -// CHECK: [[REF_TMP:%ref.tmp]] = alloca %class.anon.1 -// CHECK: [[TMP8:%tmp.+]] = alloca %class.anon.1* +// CHECK: [[BODY_REF:%body_ref]] = alloca %class.anon.6* +// CHECK: [[REF_TMP:%ref.tmp]] = alloca %class.anon.6 +// CHECK: [[TMP8:%tmp.+]] = alloca %class.anon.6* // CHECK: [[L0:%.+]] = load %class.anon*, %class.anon** [[BODY]] // CHECK: store %class.anon* [[L0]], %class.anon** [[TMP]] // CHECK: [[L5:%.+]] = load %class.anon*, %class.anon** [[TMP]] // CHECK-NOT [[L6:%.+]] = load %class.anon*, %class.anon** [[TMP]] // CHECK-NOT [[L7:%.+]] = load %class.anon*, %class.anon** [[TMP]] -// CHECK: store %class.anon.1* [[REF_TMP]], %class.anon.1** [[BODY_REF]] -// CHECK:[[L47:%.+]] = load %class.anon.1*, %class.anon.1** [[BODY_REF]] -// CHECK: store %class.anon.1* [[L47]], %class.anon.1** [[TMP8]] -// CHECK: [[L48:%.+]] = load %class.anon.1*, %class.anon.1** [[TMP8]] -// CHECK-NOT: [[L49:%.+]] = load %class.anon.1*, %class.anon.1** [[TMP8]] -// CHECK-NOT: [[L50:%.+]] = load %class.anon.1*, %class.anon.1** [[TMP8]] +// CHECK: store %class.anon.6* [[REF_TMP]], %class.anon.6** [[BODY_REF]] +// CHECK:[[L47:%.+]] = load %class.anon.6*, %class.anon.6** [[BODY_REF]] +// CHECK: store %class.anon.6* [[L47]], %class.anon.6** [[TMP8]] +// CHECK: [[L48:%.+]] = load %class.anon.6*, %class.anon.6** [[TMP8]] +// CHECK-NOT: [[L49:%.+]] = load %class.anon.6*, %class.anon.6** [[TMP8]] +// CHECK-NOT: [[L50:%.+]] = load %class.anon.6*, %class.anon.6** [[TMP8]] // CHECK: ret void // CHECK: define internal void @{{.+}}xoo{{.+}}( diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -28,13 +28,19 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -42,33 +48,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR10:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z3usev // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) +// CHECK1-NEXT: [[TMP1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP3]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP1]], i64 1) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR10]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -76,18 +91,21 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z4workv() #[[ATTR8]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @_Z4workv() #[[ATTR10]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -96,21 +114,27 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK2-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -118,33 +142,42 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR10:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_Z3usev // CHECK2-SAME: () #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) +// CHECK2-NEXT: [[TMP1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP3]], ptr [[TMP2]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP1]], i32 1) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK2-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR7:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR10]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -152,18 +185,21 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z4workv() #[[ATTR8]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: call void @_Z4workv() #[[ATTR10]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -172,7 +208,7 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -37,7 +37,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -45,10 +46,15 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]] -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -59,26 +65,34 @@ // CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP2]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -92,26 +106,26 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP3]]) #[[ATTR9]] // CHECK1-NEXT: ret void // // @@ -123,13 +137,13 @@ // CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR9]] // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR9]] // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: ret void @@ -145,11 +159,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -157,7 +169,8 @@ // CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -165,10 +178,15 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR9:[0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -179,26 +197,34 @@ // CHECK2-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP2]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -212,26 +238,26 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP3]]) #[[ATTR9]] // CHECK2-NEXT: ret void // // @@ -243,13 +269,13 @@ // CHECK2-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] +// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR9]] // CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK2: atomic_cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR9]] // CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK2: atomic_exit: // CHECK2-NEXT: ret void @@ -265,10 +291,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR5]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -77,20 +77,38 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG4:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i64 1) +// CHECK1-NEXT: [[TMP5:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG2]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP7]], ptr [[TMP6]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 1) +// CHECK1-NEXT: [[TMP8:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG4]], ptr [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_3]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -99,13 +117,16 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 42, ptr [[A]], align 4 // CHECK1-NEXT: ret void // @@ -120,19 +141,22 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 43, ptr [[A]], align 4 // CHECK1-NEXT: ret void // @@ -147,19 +171,22 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 44, ptr [[A]], align 4 // CHECK1-NEXT: ret void // @@ -174,8 +201,8 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -186,7 +213,8 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -197,22 +225,27 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 -// CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_2]] [[TMP5]], ptr [[TMP4]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1000 +// CHECK1-NEXT: [[TMP7:%.*]] = zext i1 [[CMP]] to i32 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP7]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP3]], i64 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP7]] to i32 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -220,17 +253,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 45, ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -244,8 +280,8 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -253,7 +289,8 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -263,11 +300,16 @@ // CHECK1-NEXT: [[A1:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) @@ -277,42 +319,44 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call i64 @__kmpc_warp_active_thread_mask() +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK1-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] // CHECK1: omp.critical.loop: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK1: omp.critical.test: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]] -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], [[NVPTX_NUM_THREADS]] +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK1: omp.critical.test: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], [[TMP7]] +// CHECK1-NEXT: br i1 [[TMP8]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] // CHECK1: omp.critical.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") // CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK1: omp.critical.sync: -// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[CRITICAL_COUNTER]], align 4 // CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]] // CHECK1: omp.critical.exit: // CHECK1-NEXT: ret void @@ -328,11 +372,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -340,20 +382,38 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG2:%.*]] = alloca [[STRUCT_ANON_0]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG4:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0) -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP4]], ptr [[TMP3]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP2]], i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG2]], ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP7]], ptr [[TMP6]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i32 1) +// CHECK2-NEXT: [[TMP8:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG4]], ptr [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_3]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_1]] [[TMP10]], ptr [[TMP9]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[TMP9]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void @@ -362,13 +422,16 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 42, ptr [[A]], align 4 // CHECK2-NEXT: ret void // @@ -383,19 +446,22 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 43, ptr [[A]], align 4 // CHECK2-NEXT: ret void // @@ -410,19 +476,22 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 44, ptr [[A]], align 4 // CHECK2-NEXT: ret void // @@ -437,8 +506,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3]] // CHECK2-NEXT: ret void // // @@ -449,7 +518,8 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -460,22 +530,27 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 -// CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP5]], ptr [[TMP4]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1000 +// CHECK2-NEXT: [[TMP7:%.*]] = zext i1 [[CMP]] to i32 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP7]], i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP3]], i32 1) +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -483,17 +558,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: store i32 45, ptr [[A]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // @@ -507,8 +585,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr null) #[[ATTR3]] // CHECK2-NEXT: ret void // // @@ -516,7 +594,8 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -526,11 +605,16 @@ // CHECK2-NEXT: [[A1:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr @__omp_outlined__4_wrapper, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) @@ -540,42 +624,44 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask() -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = call i64 @__kmpc_warp_active_thread_mask() +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK2-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] // CHECK2: omp.critical.loop: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK2: omp.critical.test: // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]] -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], [[NVPTX_NUM_THREADS]] +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK2: omp.critical.test: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP4]], [[TMP7]] +// CHECK2-NEXT: br i1 [[TMP8]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] // CHECK2: omp.critical.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var") +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP10]], ptr @"_gomp_critical_user_$var") // CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]] // CHECK2: omp.critical.sync: -// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]]) -// CHECK2-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP3]]) +// CHECK2-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[CRITICAL_COUNTER]], align 4 // CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]] // CHECK2: omp.critical.exit: // CHECK2-NEXT: ret void @@ -591,10 +677,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -37,7 +37,8 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 @@ -49,11 +50,16 @@ // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK-NEXT: store ptr [[D]], ptr [[TMP6]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK-NEXT: store ptr [[D]], ptr [[TMP5]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP6]]) +// CHECK-NEXT: [[TMP8:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP8]], ptr [[TMP7]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[TMP7]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP6]], i64 16) // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 3 // CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 @@ -66,12 +72,11 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -81,77 +86,79 @@ // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK: omp.dispatch.cond: -// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK: cond.true: // CHECK-NEXT: br label [[COND_END:%.*]] // CHECK: cond.false: -// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[COND_END]] // CHECK: cond.end: -// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK: omp.dispatch.body: // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK: omp.inner.for.cond: -// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK: omp.inner.for.body: -// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP12]] +// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], [[TMP15]] // CHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK: omp.body.continue: // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK: omp.inner.for.inc: -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK: omp.inner.for.end: // CHECK-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK: omp.dispatch.inc: -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4 -// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4 // CHECK-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK: omp.dispatch.end: -// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK-NEXT: ret void // // @@ -165,12 +172,8 @@ // CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -149,7 +149,8 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 // CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 @@ -158,11 +159,16 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP7]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP6]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i64 16) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -170,27 +176,28 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK1-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -203,7 +210,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[AA:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -212,15 +219,15 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -228,7 +235,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK1-SAME: (i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 @@ -265,41 +272,41 @@ // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.000000e+00 -// CHECK1-NEXT: [[CONV7:%.*]] = fptrunc double [[ADD6]] to float -// CHECK1-NEXT: store float [[CONV7]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP11]] to double -// CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 -// CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX12]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX13]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP12]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float +// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP11]] to double +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 +// CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float +// CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 // CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX15]], i64 3 -// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX16]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD17]], ptr [[ARRAYIDX16]], align 8 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP14]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD18]], ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP15]], 1 +// CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 // CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV19:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[CONV19]], 1 -// CHECK1-NEXT: [[CONV21:%.*]] = trunc i32 [[ADD20]] to i8 -// CHECK1-NEXT: store i8 [[CONV21]], ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 +// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 +// CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(8) ptr @_ZN2TTIxcEixEi(ptr nonnull align 8 dereferenceable(16) [[TMP7]], i32 0) #[[ATTR10:[0-9]+]] // CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[CALL]], align 8 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP17]], 1 -// CHECK1-NEXT: store i64 [[ADD22]], ptr [[CALL]], align 8 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i64 [[TMP17]], 1 +// CHECK1-NEXT: store i64 [[ADD21]], ptr [[CALL]], align 8 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -307,7 +314,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { +// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR7:[0-9]+]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -319,7 +326,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -338,19 +345,19 @@ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -358,7 +365,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK1-SAME: (ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 @@ -379,24 +386,24 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP5]] to double -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[A4]], align 8 +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[A3]], align 8 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 // CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP7]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 -// CHECK1-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[A7]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi double [[TMP8]] to i32 -// CHECK1-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV8]], ptr nonnull align 8 dereferenceable(8) [[A9]]) #[[ATTR10]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK1-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[A6]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[TMP8]] to i32 +// CHECK1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3baziRd(i32 [[CONV7]], ptr nonnull align 8 dereferenceable(8) [[A8]]) #[[ATTR10]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -404,27 +411,33 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK1-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR7]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK1-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[F]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[F]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 16) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4 // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[F]], i64 4) // CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 -// CHECK1-SAME: () #[[ATTR4]] { +// CHECK1-SAME: () #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -440,7 +453,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 @@ -457,14 +470,14 @@ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -472,30 +485,31 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP6]] // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -504,13 +518,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -519,7 +529,8 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 // CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 @@ -528,11 +539,16 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP7]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP6]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP5]], i32 8) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -540,27 +556,28 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39 -// CHECK2-SAME: () #[[ATTR4:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR6:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -573,7 +590,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47 -// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[AA:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -582,15 +599,15 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK2-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 -// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2 +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 +// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -598,7 +615,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53 -// CHECK2-SAME: (i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 @@ -677,7 +694,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi -// CHECK2-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { +// CHECK2-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR7:[0-9]+]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 @@ -689,7 +706,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90 -// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 @@ -708,19 +725,19 @@ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], ptr [[AAA_ADDR]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK2-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -728,7 +745,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108 -// CHECK2-SAME: (ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 @@ -774,27 +791,33 @@ // // // CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK2-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR7]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK2-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[F1]], ptr [[F]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[F]], ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[F]], ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 8) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[F]], align 4 // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[F]], i32 4) // CHECK2-NEXT: ret i32 [[TMP7]] // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142 -// CHECK2-SAME: () #[[ATTR4]] { +// CHECK2-SAME: () #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -810,7 +833,7 @@ // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] { +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR6]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 @@ -827,14 +850,14 @@ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -842,30 +865,31 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP6]] // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32 -// CHECK2-NEXT: store i32 [[CONV]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -874,12 +898,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -55,7 +55,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -63,9 +64,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -73,20 +79,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -96,7 +104,8 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -108,13 +117,18 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i64 24) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -122,32 +136,32 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -156,7 +170,8 @@ // CHECK2-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -164,9 +179,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -174,20 +194,22 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK2-NEXT: ret void // // @@ -197,7 +219,8 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -209,13 +232,18 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK2-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP10]], ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP9]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP8]], i32 12) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -223,32 +251,32 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -50,7 +50,8 @@ // CHECK1-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -58,9 +59,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -68,20 +74,22 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -92,7 +100,8 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -106,13 +115,18 @@ // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP11]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP9]], i64 24) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -120,32 +134,32 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -154,7 +168,8 @@ // CHECK2-SAME: (ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) @@ -162,9 +177,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -172,20 +192,22 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 +// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 // CHECK2-NEXT: ret void // // @@ -196,7 +218,8 @@ // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -210,13 +233,18 @@ // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK2-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP9]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP11]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP10]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP9]], i32 12) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -224,32 +252,32 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -33,6 +33,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -42,7 +43,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -50,10 +51,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -63,9 +65,12 @@ // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8) @@ -79,68 +84,73 @@ // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[IB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP2]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR12:[0-9]+]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP9]], 4 // CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 // CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP20]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP22]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP24]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP13]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP14:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP15]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP14]], i64 24) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[IB]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] @@ -165,18 +175,16 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[THIS1]], ptr nonnull align 4 dereferenceable(4) [[TMP0]], ptr nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[THIS1]], ptr nonnull align 4 dereferenceable(4) [[TMP0]], ptr nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -187,29 +195,31 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = alloca %"class.std::complex", align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[REF_TMP5:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca %"class.std::complex", align 4 +// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IV]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] @@ -220,127 +230,127 @@ // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[I]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_LB]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP6]]) #[[ATTR6]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP6]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP5]]) #[[ATTR6]] +// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP5]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP5]]) #[[ATTR11]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP5]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP23]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP22]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP24]], [[ADD10]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP27]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP13]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP28]] to float +// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP14]], align 4, !tbaa [[TBAA14]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float -// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[REF_TMP16]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float -// CHECK1-NEXT: store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR12]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP29]] to float +// CHECK1-NEXT: store float [[CONV16]], ptr [[REF_TMP15]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP13]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP13]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP13]]) #[[ATTR6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP54]]) -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) -// CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 -// CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP38]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) +// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP40]], 1 +// CHECK1-NEXT: br i1 [[TMP41]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP2]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR12]] -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) +// CHECK1-NEXT: [[CALL20:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP6]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]]) #[[ATTR11]] +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] @@ -363,13 +373,13 @@ // CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(ptr nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR12]] +// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(ptr nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] // CHECK1-NEXT: store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(ptr nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR12]] +// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(ptr nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] @@ -378,7 +388,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 @@ -391,50 +401,50 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2, !tbaa [[TBAA19]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2, !tbaa [[TBAA19]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2, !tbaa [[TBAA19]] -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK1-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr %"class.std::complex", ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK1-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK1-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK1-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK1-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK1-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR6]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP46]], ptr align 4 [[TMP43]], i64 8, i1 false), !tbaa.struct !21 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP36]], ptr align 4 [[TMP34]], i64 8, i1 false), !tbaa.struct !21 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] @@ -443,7 +453,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -460,48 +470,48 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK1-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -510,21 +520,16 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]], ptr [[TMP11]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 // CHECK1-SAME: () #[[ATTR0]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1]], i8 1, i1 true, i1 true) @@ -534,7 +539,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR6]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -542,10 +547,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -555,9 +561,12 @@ // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 16) @@ -571,68 +580,73 @@ // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[IB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP2]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP9]], 4 // CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 // CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP20]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP22]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP24]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP11]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP13]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP14:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[TMP15]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP14]], i64 24) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[IB]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] @@ -657,18 +671,16 @@ // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[THIS1]], ptr nonnull align 8 dereferenceable(8) [[TMP0]], ptr nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR12]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[THIS1]], ptr nonnull align 8 dereferenceable(8) [[TMP0]], ptr nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -679,29 +691,31 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = alloca %"class.std::complex.2", align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[REF_TMP5:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca %"class.std::complex.2", align 8 +// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IV]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] @@ -712,127 +726,127 @@ // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[I]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_LB]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP6]]) #[[ATTR6]] -// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP5]]) #[[ATTR6]] +// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP5]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP5]]) #[[ATTR11]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP5]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP28]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP23]], 1 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP22]], [[ADD8]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP24]], [[ADD10]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP27]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[MUL]] +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr [[REF_TMP13]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP28]] to double +// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP14]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double -// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[REF_TMP16]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double -// CHECK1-NEXT: store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR12]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR12]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP16]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK1-NEXT: store double [[CONV16]], ptr [[REF_TMP15]], align 8, !tbaa [[TBAA22]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP13]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]]) #[[ATTR11]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP13]]) #[[ATTR11]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP15]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[REF_TMP14]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[REF_TMP13]]) #[[ATTR6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP54]]) -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func5, ptr @_omp_reduction_inter_warp_copy_func6) -// CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 -// CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP38]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func5, ptr @_omp_reduction_inter_warp_copy_func6) +// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i32 [[TMP40]], 1 +// CHECK1-NEXT: br i1 [[TMP41]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP2]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR12]] -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) +// CHECK1-NEXT: [[CALL20:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP6]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]]) #[[ATTR11]] +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP38]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I7]]) #[[ATTR6]] -// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[PARTIAL_SUM5]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I6]]) #[[ATTR6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr [[PARTIAL_SUM]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_IS_LAST]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_STRIDE]]) #[[ATTR6]] // CHECK1-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[DOTOMP_UB]]) #[[ATTR6]] @@ -855,14 +869,14 @@ // CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(ptr nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR12]] -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(ptr nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]] +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK1-NEXT: store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(ptr nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR12]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(ptr nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]] +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] // CHECK1-NEXT: store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[TBAA26]] @@ -870,76 +884,76 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.2", align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2, !tbaa [[TBAA19]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2, !tbaa [[TBAA19]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2, !tbaa [[TBAA19]] -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", ptr [[TMP11]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr %"class.std::complex.2", ptr [[TMP9]], i64 1 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK1: .shuffle.pre_cond: -// CHECK1-NEXT: [[TMP17:%.*]] = phi ptr [ [[TMP11]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] -// CHECK1-NEXT: [[TMP18:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP23]], 7 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] +// CHECK1-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP17:%.*]] = sdiv exact i64 [[TMP16]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP17]], 7 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] // CHECK1: .shuffle.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 -// CHECK1-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) -// CHECK1-NEXT: store i64 [[TMP28]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP29]] = getelementptr i64, ptr [[TMP17]], i64 1 -// CHECK1-NEXT: [[TMP30]] = getelementptr i64, ptr [[TMP18]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP21:%.*]] = trunc i32 [[TMP20]] to i16 +// CHECK1-NEXT: [[TMP22:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP19]], i16 [[TMP6]], i16 [[TMP21]]) +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP23]] = getelementptr i64, ptr [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK1: .shuffle.exit: -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 -// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] -// CHECK1-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] -// CHECK1-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] -// CHECK1-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP28:%.*]] = and i1 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP30:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP30]], 0 +// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP29]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP34:%.*]] = and i1 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP25]], [[TMP28]] +// CHECK1-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]] +// CHECK1-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR6]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] -// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP52]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP54]], ptr align 8 [[TMP51]], i64 16, i1 false), !tbaa.struct !27 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP43]], ptr align 8 [[TMP41]], i64 16, i1 false), !tbaa.struct !27 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] @@ -948,7 +962,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -965,48 +979,48 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 -// CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 4 +// CHECK1-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4, !tbaa [[TBAA8]] -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 @@ -1015,15 +1029,9 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP6]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]], ptr [[TMP8]], ptr [[TMP11]]) #[[ATTR6]] +// CHECK1-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -1080,11 +1088,11 @@ // CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[TBAA26]] @@ -1097,7 +1105,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: ret double [[TMP0]] // @@ -1108,7 +1116,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.2", ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK1-NEXT: ret double [[TMP0]] // diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -40,6 +40,18 @@ } } } +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckNoArgs_l25 +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) +// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK: user_code.entry: +// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str1, ptr null, i32 0) +// CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) +// CHECK-NEXT: ret void +// CHECK: worker.exit: +// CHECK-NEXT: ret void // // // @@ -60,7 +72,7 @@ // CHECK-64-NEXT: store i64 2, ptr [[TMP3]], align 8 // CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-64-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 -// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) +// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) // CHECK-64-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-64-NEXT: ret void // CHECK-64: worker.exit: @@ -97,7 +109,7 @@ // CHECK-64: if.then: // CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PRINTF_ARGS_0]], ptr [[TMP]], i32 0, i32 0 // CHECK-64-NEXT: store i32 42, ptr [[TMP2]], align 4 -// CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str2, ptr [[TMP]], i32 4) +// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str2, ptr [[TMP]], i32 4) // CHECK-64-NEXT: br label [[IF_END]] // CHECK-64: worker.exit: // CHECK-64-NEXT: ret void @@ -108,7 +120,6 @@ // // // -// // CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_CheckSimple_l13 // CHECK-32-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-32-NEXT: entry: @@ -126,7 +137,7 @@ // CHECK-32-NEXT: store i64 2, ptr [[TMP3]], align 8 // CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PRINTF_ARGS]], ptr [[TMP]], i32 0, i32 2 // CHECK-32-NEXT: store double 3.000000e+00, ptr [[TMP4]], align 8 -// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) +// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__llvm_omp_vprintf(ptr [[TMP1]], ptr [[TMP]], i32 24) // CHECK-32-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK-32-NEXT: ret void // CHECK-32: worker.exit: @@ -163,7 +174,7 @@ // CHECK-32: if.then: // CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PRINTF_ARGS_0]], ptr [[TMP]], i32 0, i32 0 // CHECK-32-NEXT: store i32 42, ptr [[TMP2]], align 4 -// CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str2, ptr [[TMP]], i32 4) +// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__llvm_omp_vprintf(ptr @.str2, ptr [[TMP]], i32 4) // CHECK-32-NEXT: br label [[IF_END]] // CHECK-32: worker.exit: // CHECK-32-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -53,7 +53,7 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -62,12 +62,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[A_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP3]], ptr [[TMP2]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -75,15 +75,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i8 49, ptr [[A_ADDR]], align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A]], align 1 +// CHECK1-NEXT: store i8 49, ptr [[A]], align 1 // CHECK1-NEXT: ret void // // @@ -91,7 +96,7 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -100,12 +105,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -113,15 +118,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i16 1, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: store i16 1, ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -129,7 +139,7 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 2 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 @@ -138,12 +148,12 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -151,53 +161,74 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[AA]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_2]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i16 1, ptr [[TMP0]], align 2 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i16 1, ptr [[TMP2]], align 2 // CHECK1-NEXT: ret void // // @@ -205,7 +236,7 @@ // CHECK2-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -214,12 +245,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1 -// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[A_ADDR]], align 1 +// CHECK2-NEXT: store i8 [[TMP3]], ptr [[TMP2]], align 1 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -227,15 +258,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i8, align 1 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i8 49, ptr [[A_ADDR]], align 1 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A]], align 1 +// CHECK2-NEXT: store i8 49, ptr [[A]], align 1 // CHECK2-NEXT: ret void // // @@ -243,7 +279,7 @@ // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -252,12 +288,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -265,15 +301,20 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store i16 1, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK2-NEXT: store i16 1, ptr [[AA]], align 2 // CHECK2-NEXT: ret void // // @@ -281,7 +322,7 @@ // CHECK2-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 2 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 @@ -290,12 +331,12 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 2 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -303,52 +344,73 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[AA]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: store i16 1, ptr [[TMP0]], align 2 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i16 1, ptr [[TMP2]], align 2 // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -35,6 +35,7 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -44,7 +45,7 @@ // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -52,83 +53,94 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[I:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[I]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP11]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP10]], i64 8) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -142,17 +154,16 @@ // CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -162,7 +173,7 @@ // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -170,83 +181,94 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK2-NEXT: [[I:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[I]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I]], ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP10]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP12]], ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr @__omp_outlined__1_wrapper, ptr [[TMP11]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP10]], i32 4) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[I]], i32 4) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -260,10 +282,8 @@ // CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__kmpc_get_shared_variables_aggregate(ptr [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP5]]) #[[ATTR5]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP2]]) #[[ATTR4]] // CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -77,8 +77,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -90,15 +89,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -106,13 +107,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -122,129 +123,137 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK1-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP48]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP53]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -254,15 +263,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -275,109 +284,119 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP40]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -390,7 +409,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -401,12 +420,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -414,12 +435,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -429,129 +450,139 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP29]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP30]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP29]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -564,56 +595,64 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 // CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 // CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2 @@ -621,17 +660,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -641,6 +680,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -650,9 +690,11 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -660,106 +702,118 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_5]] [[TMP20]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP19]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 24) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -769,56 +823,62 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -827,7 +887,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -838,12 +898,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -851,12 +913,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -864,106 +926,116 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP22]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_8]] [[TMP24]], ptr [[TMP23]], align 1 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP23]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP22]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK1: cond.true6: // CHECK1-NEXT: br label [[COND_END8:%.*]] // CHECK1: cond.false7: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END8]] // CHECK1: cond.end8: -// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -976,75 +1048,83 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 // CHECK1-NEXT: store i32 10, ptr [[K]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1053,7 +1133,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1064,12 +1144,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1077,12 +1159,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1095,25 +1177,31 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_11]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -1121,116 +1209,120 @@ // CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK1-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP19]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr -// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK1-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP30]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_11]] [[TMP32]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP34]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[TMP31]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP30]], i64 32) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP35]], [[TMP36]] // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP41]], [[TMP42]] // CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] // CHECK1: cond.true18: -// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: br label [[COND_END20:%.*]] // CHECK1: cond.false19: -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END20]] // CHECK1: cond.end20: -// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE18]] ], [ [[TMP39]], [[COND_FALSE19]] ] +// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP43]], [[COND_TRUE18]] ], [ [[TMP44]], [[COND_FALSE19]] ] // CHECK1-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP40]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP45]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP42]]) +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP47]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1247,21 +1339,29 @@ // CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -1269,90 +1369,90 @@ // CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 // CHECK1-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] // CHECK1-NEXT: [[CONV15:%.*]] = sext i32 [[MUL14]] to i64 -// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i64 [[TMP15]], [[CONV15]] +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i64 [[TMP23]], [[CONV15]] // CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK1-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 // CHECK1-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK1-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 -// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP18]], [[CONV22]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP26]], [[CONV22]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK1-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK1-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK1-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK1-NEXT: [[MUL28:%.*]] = mul nsw i64 [[DIV23]], [[CONV27]] -// CHECK1-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP17]], [[MUL28]] +// CHECK1-NEXT: [[SUB29:%.*]] = sub nsw i64 [[TMP25]], [[MUL28]] // CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK1-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK1-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 // CHECK1-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM34]] // CHECK1-NEXT: store i32 [[ADD33]], ptr [[ARRAYIDX35]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1364,7 +1464,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1376,13 +1476,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1390,13 +1494,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1406,134 +1509,143 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_13]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_13]] [[TMP34]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[TMP33]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE10]] ], [ [[TMP46]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1546,76 +1658,84 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1627,8 +1747,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1640,15 +1759,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1656,13 +1777,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1672,129 +1793,137 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK2-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK2-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK2-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK2-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK2-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP48]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP53]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1804,15 +1933,15 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1825,109 +1954,119 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] // CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK2-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP40]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1940,7 +2079,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1951,12 +2090,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1964,12 +2105,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1979,129 +2120,139 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP29]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP31]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP30]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP29]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) +// CHECK2-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2114,56 +2265,64 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 // CHECK2-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 // CHECK2-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2 @@ -2171,17 +2330,17 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -2191,6 +2350,7 @@ // CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -2200,9 +2360,11 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2210,106 +2372,118 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3) +// CHECK2-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24) +// CHECK2-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]) +// CHECK2-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_5]] [[TMP20]], ptr [[TMP19]], align 8 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP19]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 24) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2319,56 +2493,62 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -2377,7 +2557,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -2388,12 +2568,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2401,12 +2583,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2414,106 +2596,116 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK2-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP22]]) +// CHECK2-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK2-NEXT: store [[STRUCT_ANON_8]] [[TMP24]], ptr [[TMP23]], align 1 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP23]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP22]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: -// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2526,75 +2718,83 @@ // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 // CHECK2-NEXT: store i32 10, ptr [[K]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 -// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) // CHECK2-NEXT: ret void // // @@ -2603,7 +2803,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -2614,12 +2814,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2627,12 +2829,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2645,141 +2847,151 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_11]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] // CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 // CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr -// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr -// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr -// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4) +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK2-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32) +// CHECK2-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_11]] [[TMP34]], ptr [[TMP33]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[TMP33]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i64 32) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK2-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]] // CHECK2: cond.true16: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END18:%.*]] // CHECK2: cond.false17: -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END18]] // CHECK2: cond.end18: -// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE16]] ], [ [[TMP41]], [[COND_FALSE17]] ] +// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE16]] ], [ [[TMP46]], [[COND_FALSE17]] ] // CHECK2-NEXT: store i32 [[COND19]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2796,108 +3008,116 @@ // CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] // CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 // CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP17]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV8]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP14]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP22]] // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK2-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] -// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP15]], [[MUL15]] +// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP23]], [[MUL15]] // CHECK2-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL17]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I9]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 // CHECK2-NEXT: [[MUL20:%.*]] = mul nsw i32 1, [[DIV19]] -// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP18]], [[MUL20]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP26]], [[MUL20]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK2-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK2-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV21]], [[MUL24]] -// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP17]], [[MUL25]] +// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP25]], [[MUL25]] // CHECK2-NEXT: [[MUL27:%.*]] = mul nsw i32 [[SUB26]], 1 // CHECK2-NEXT: [[ADD28:%.*]] = add nsw i32 0, [[MUL27]] // CHECK2-NEXT: store i32 [[ADD28]], ptr [[J10]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4 -// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP32]] to i64 // CHECK2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM30]] // CHECK2-NEXT: store i32 [[ADD29]], ptr [[ARRAYIDX31]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -2909,7 +3129,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -2921,13 +3141,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[V_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -2935,13 +3159,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2951,134 +3174,143 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_13]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK2-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK2-NEXT: store [[STRUCT_ANON_13]] [[TMP34]], ptr [[TMP33]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[TMP33]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i64 40) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ] +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE10]] ], [ [[TMP46]], [[COND_FALSE11]] ] // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3091,76 +3323,84 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]] -// CHECK2-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM7]] +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -3172,8 +3412,7 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -3185,15 +3424,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3201,13 +3442,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3217,127 +3458,135 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK3-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20) +// CHECK3-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP31]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_0]] [[TMP33]], ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP32]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP31]], i32 20) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] // CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END13:%.*]] // CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END13]] // CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE11]] ], [ [[TMP45]], [[COND_FALSE12]] ] // CHECK3-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP48]]) +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK3-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP46]], ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, ptr [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP51]], ptr [[L]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -3347,15 +3596,15 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3368,104 +3617,114 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP29]] // CHECK3-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[L]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[L]], align 4 +// CHECK3-NEXT: store i32 [[TMP40]], ptr [[L]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -3478,7 +3737,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -3489,12 +3748,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3502,12 +3763,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3517,127 +3778,137 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP28:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP27]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_2]] [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP28]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP27]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]]) +// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3650,52 +3921,60 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 // CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2 @@ -3703,17 +3982,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3723,6 +4002,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -3732,9 +4012,11 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3742,104 +4024,116 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12) +// CHECK3-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_4]] [[TMP18]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP17]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i32 12) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3849,52 +4143,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -3903,7 +4203,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -3914,12 +4214,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3927,12 +4229,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3940,104 +4242,114 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr -// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP20]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_6]] [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP21]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP20]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP29]], 99 // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK3: cond.true6: // CHECK3-NEXT: br label [[COND_END8:%.*]] // CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END8]] // CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ] +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP30]], [[COND_FALSE7]] ] // CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4050,70 +4362,78 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL3]] // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[J]], align 4 // CHECK3-NEXT: store i32 10, ptr [[K]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[MUL6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -4122,7 +4442,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -4133,12 +4453,14 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -4146,12 +4468,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4164,25 +4486,31 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -4190,118 +4518,122 @@ // CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 // CHECK3-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP10]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() // CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP13]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], 1 +// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP19]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 -// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to ptr -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to ptr -// CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to ptr -// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4) +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16) +// CHECK3-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP32]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_8]] [[TMP34]], ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr null, ptr [[TMP33]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP32]], i32 16) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP39]], [[TMP40]] // CHECK3-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP41]], [[TMP42]] // CHECK3-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP43]], [[TMP44]] // CHECK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] // CHECK3: cond.true18: -// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: br label [[COND_END20:%.*]] // CHECK3: cond.false19: -// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK3-NEXT: br label [[COND_END20]] // CHECK3: cond.end20: -// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE18]] ], [ [[TMP41]], [[COND_FALSE19]] ] +// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP45]], [[COND_TRUE18]] ], [ [[TMP46]], [[COND_FALSE19]] ] // CHECK3-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP42]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP47:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP47]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP49]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4318,21 +4650,29 @@ // CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] @@ -4340,91 +4680,91 @@ // CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 // CHECK3-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP17]] to i64 // CHECK3-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[CONV13]] // CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 // CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] // CHECK3-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] // CHECK3-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] // CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK3-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 // CHECK3-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] // CHECK3-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK3-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 // CHECK3-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] // CHECK3-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 // CHECK3-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] // CHECK3-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 // CHECK3-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] // CHECK3-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 // CHECK3-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i32 0, i32 [[TMP31]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP32]] // CHECK3-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX37]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP33]], [[TMP34]] // CHECK3-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP28]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -4436,7 +4776,7 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -4448,13 +4788,17 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[V_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__omp_outlined__10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -4462,13 +4806,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4478,132 +4821,141 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_10]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20) +// CHECK3-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP30]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_10]] [[TMP32]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP34]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr null, ptr [[TMP31]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP30]], i32 20) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE10]] ], [ [[TMP44]], [[COND_FALSE11]] ] // CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP47]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4616,71 +4968,79 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX5]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX5]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP29]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -35,8 +35,7 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 @@ -48,15 +47,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -64,13 +65,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -80,138 +81,146 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40) +// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1 +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) +// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -224,104 +233,114 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP17]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP11]] to i32 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP10]], [[CONV6]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP21]] to i32 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP20]], [[CONV6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP22]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR7]] // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CALL]], [[CALL11]] -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC]]) #[[ATTR7]] // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD12]], [[CALL13]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -333,8 +352,7 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -346,15 +364,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -362,13 +382,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -378,136 +398,144 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20) +// CHECK2-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP31]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP33]], ptr [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP32]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP31]], i32 20) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE11]] ], [ [[TMP45]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) +// CHECK2-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP48]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[ARGC:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -520,100 +548,110 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARGC]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP19]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP17]]) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[CALL8:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[CALL8:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP8]]) #[[ATTR7]] // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]] -// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]] +// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC]]) #[[ATTR7]] // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]] -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[TMP8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP36]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -65,8 +65,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -78,15 +77,17 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -94,13 +95,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -110,141 +111,149 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK1-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP33:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP33]]), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP35:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP35]], ptr [[TMP34]], align 1, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP37]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP34]]), !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP33]], i64 40), !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] // CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[COND_END13:%.*]] // CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[COND_END13]] // CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] // CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: store i32 [[TMP48]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]]) -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 -// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP50]]) +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP53]], 0 // CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD17]], ptr [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 -// CHECK1-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 +// CHECK1-NEXT: br i1 [[TMP55]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP51]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP56]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -254,15 +263,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -275,121 +284,131 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP19]], [[CONV5]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP21]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 // CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] // CHECK1-NEXT: store i32 [[ADD16]], ptr [[I4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[L]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], ptr [[L]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -402,7 +421,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -413,12 +432,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -426,12 +447,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -441,121 +462,131 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr -// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP29:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP29]]), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP31:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP30]]), !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP29]], i64 32), !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: -// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE10]] ], [ [[TMP43]], [[COND_FALSE11]] ] // CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK1-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]]) -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]]) +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK1-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -568,14 +599,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -588,56 +619,64 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 // CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 // CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] @@ -645,23 +684,23 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 // CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -677,6 +716,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 @@ -686,9 +726,11 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -696,98 +738,110 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_5]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 24), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store [[STRUCT_ANON_5]] [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP19]]), !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 24), !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -796,13 +850,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -812,59 +866,65 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -877,7 +937,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -888,12 +948,14 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -901,12 +963,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -914,97 +976,107 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_8]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP22:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP22]]), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP24:%.*]] = load [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 1, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store [[STRUCT_ANON_8]] [[TMP24]], ptr [[TMP23]], align 1, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP23]]), !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP22]], i64 32), !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP31]], 99 // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK1: cond.true6: // CHECK1-NEXT: br label [[COND_END8:%.*]] // CHECK1: cond.false7: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[COND_END8]] // CHECK1: cond.end8: -// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP32]], [[COND_FALSE7]] ] // CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: store i32 10, ptr [[J]], align 4 @@ -1014,14 +1086,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1034,78 +1106,86 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], [[MUL8]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: store i32 10, ptr [[J]], align 4 @@ -1120,8 +1200,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -1133,15 +1212,17 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1149,13 +1230,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1165,139 +1246,147 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[L]], align 4 // CHECK2-NEXT: [[L1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP18]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr -// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[L]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP31:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 20), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP31]]), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP33:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store [[STRUCT_ANON_0]] [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP32]]), !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP31]], i32 20), !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP42]], [[TMP43]] // CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[COND_END13:%.*]] // CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[COND_END13]] // CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP44]], [[COND_TRUE11]] ], [ [[TMP45]], [[COND_FALSE12]] ] // CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: store i32 [[TMP46]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]]) -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK2-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP47:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[TMP47]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP48]]) +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK2-NEXT: br i1 [[TMP50]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP51]], 0 // CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD17]], ptr [[I4]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK2-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK2-NEXT: br i1 [[TMP53]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP49]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP54]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1307,15 +1396,15 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1328,116 +1417,126 @@ // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[L]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP18]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP29]] // CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP37]]) +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK2-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[L]], align 4 +// CHECK2-NEXT: store i32 [[TMP43]], ptr [[L]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1450,7 +1549,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -1461,12 +1560,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1474,12 +1575,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1489,119 +1590,129 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_2]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP16]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr -// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr -// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr -// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP27:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP28:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP27]]), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP29:%.*]] = load [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store [[STRUCT_ANON_2]] [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr null, ptr [[TMP28]]), !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP27]], i32 16), !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: -// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ] +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] // CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]]) -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK2-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]]) +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK2-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 // CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] @@ -1614,14 +1725,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1634,52 +1745,60 @@ // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP16]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP8]], i32 0, i32 [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 // CHECK2-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]] @@ -1687,23 +1806,23 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP18]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP29]], 0 // CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -1719,6 +1838,7 @@ // CHECK2-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 @@ -1728,9 +1848,11 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1738,96 +1860,108 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_4]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 10 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 12), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store [[STRUCT_ANON_4]] [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr null, ptr [[TMP17]]), !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i32 12), !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK2-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1836,13 +1970,13 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1852,55 +1986,61 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1913,7 +2053,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -1924,12 +2064,14 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1937,12 +2079,12 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1950,95 +2092,105 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_6]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP10]], 100 // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP20:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 16), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP20]]), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP22:%.*]] = load [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store [[STRUCT_ANON_6]] [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB4]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr null, ptr [[TMP21]]), !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP20]], i32 16), !llvm.access.group [[ACC_GRP31]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP29]], 99 // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK2: cond.true6: // CHECK2-NEXT: br label [[COND_END8:%.*]] // CHECK2: cond.false7: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK2-NEXT: br label [[COND_END8]] // CHECK2: cond.end8: -// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ] +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP30]], [[COND_FALSE7]] ] // CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP6]]) +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK2-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: store i32 10, ptr [[J]], align 4 @@ -2048,14 +2200,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[F:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2068,73 +2220,81 @@ // CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[F]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP18]], 10 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] // CHECK2-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK2-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[F]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], [[MUL6]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 10, ptr [[I]], align 4 // CHECK2-NEXT: store i32 10, ptr [[J]], align 4 diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -80,6 +80,7 @@ // CHECK1-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 @@ -91,9 +92,11 @@ // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -102,16 +105,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -119,6 +124,7 @@ // CHECK1-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 @@ -130,9 +136,11 @@ // CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -141,16 +149,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK1-NEXT: ret void // // @@ -158,6 +168,7 @@ // CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -169,9 +180,11 @@ // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void @@ -180,16 +193,18 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -197,6 +212,7 @@ // CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -208,9 +224,11 @@ // CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK2-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void @@ -219,16 +237,18 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK2-NEXT: ret void // // @@ -238,6 +258,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -248,13 +269,15 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[ARGC3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC3]], align 4 +// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC3]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC3]], i64 4) +// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -262,16 +285,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -281,6 +306,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -291,13 +317,15 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[ARGC2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC2]], align 8 +// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC2]]) #[[ATTR3]] -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC2]], i64 8) +// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -305,16 +333,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK3-NEXT: ret void // // @@ -324,6 +354,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -337,9 +368,11 @@ // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]] +// CHECK4-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: ret void @@ -348,16 +381,18 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK4-NEXT: ret void // // @@ -367,6 +402,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -380,9 +416,11 @@ // CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) // CHECK4-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[ARGC1]], ptr [[TMP3]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]] +// CHECK4-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4) // CHECK4-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK4-NEXT: ret void @@ -391,15 +429,17 @@ // // // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -54,6 +54,7 @@ // CHECK1-SAME: (i64 noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8 @@ -65,9 +66,11 @@ // CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) // CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[E1]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void @@ -76,38 +79,40 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) -// CHECK1-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK1-NEXT: store double [[ADD]], ptr [[E1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[E1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[E:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[E]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK1-NEXT: store double [[ADD]], ptr [[E]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[E]], ptr [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) // CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 // CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[E]], align 8 +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store double [[ADD1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E]], i64 8) // CHECK1-NEXT: ret void // // @@ -125,51 +130,51 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK1-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK1-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK1-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK1-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK1-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK1-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK1-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 -// CHECK1-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] @@ -195,41 +200,41 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK1-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] // CHECK1: ifcont4: -// CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4 // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void @@ -245,14 +250,14 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8 -// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 // CHECK1-NEXT: ret void // // @@ -267,13 +272,13 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -287,14 +292,14 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128 -// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -309,13 +314,13 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -324,6 +329,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 @@ -333,17 +339,21 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1 -// CHECK1-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1 +// CHECK1-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1 // CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4 -// CHECK1-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: store float [[TMP2]], ptr [[D3]], align 4 +// CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[C1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[D2]], ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i64 4) -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i64 1) +// CHECK1-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1) // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -351,58 +361,59 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) -// CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) -// CHECK1-NEXT: store i8 0, ptr [[C1]], align 1 -// CHECK1-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: store i8 0, ptr [[C]], align 1 +// CHECK1-NEXT: store float 1.000000e+00, ptr [[D]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[C]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK1-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK1-NEXT: store float [[MUL]], ptr [[D2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[C1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[D2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK1-NEXT: store i8 [[CONV1]], ptr [[C]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[D]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK1-NEXT: store float [[MUL]], ptr [[D]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[D]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP11]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 -// CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK1-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[C]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK1-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[TMP2]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[D]], align 4 +// CHECK1-NEXT: [[MUL6:%.*]] = fmul float [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store float [[MUL6]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4) -// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D]], i64 4) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C]], i64 1) // CHECK1-NEXT: ret void // // @@ -421,71 +432,71 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 -// CHECK1-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK1-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK1-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK1-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]]) +// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK1-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]]) +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i64 1 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 -// CHECK1-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] -// CHECK1-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] -// CHECK1-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] -// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] -// CHECK1-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] -// CHECK1-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 -// CHECK1-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 -// CHECK1-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1 +// CHECK1-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4 +// CHECK1-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: // CHECK1-NEXT: br label [[IFCONT7]] @@ -511,25 +522,25 @@ // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK1-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +// CHECK1-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 -// CHECK1-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1 +// CHECK1-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] @@ -538,25 +549,25 @@ // CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK1: then6: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4 // CHECK1-NEXT: br label [[IFCONT8:%.*]] // CHECK1: else7: // CHECK1-NEXT: br label [[IFCONT8]] // CHECK1: ifcont8: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK1: then10: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 // CHECK1-NEXT: br label [[IFCONT12:%.*]] // CHECK1: else11: // CHECK1-NEXT: br label [[IFCONT12]] @@ -574,20 +585,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK1-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4 -// CHECK1-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 // CHECK1-NEXT: ret void // // @@ -602,17 +613,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -626,20 +637,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128 -// CHECK1-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128 -// CHECK1-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK1-NEXT: ret void // // @@ -654,17 +665,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -673,6 +684,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -682,9 +694,13 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -692,130 +708,138 @@ // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK1-NEXT: store [[STRUCT_ANON_3]] [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP11]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i64 16) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP13]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP11]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) // CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 // CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 // CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i32 [[OR]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK1-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[B]], align 2 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: // CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK1-NEXT: store i16 [[COND]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP11]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[OR]], ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[OR]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP9]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) // CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK1: cond.true9: -// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: br label [[COND_END11:%.*]] -// CHECK1: cond.false10: -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK1-NEXT: br label [[COND_END11]] -// CHECK1: cond.end11: -// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK1-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[OR3:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK1: cond.true7: +// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: br label [[COND_END9:%.*]] +// CHECK1: cond.false8: +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: br label [[COND_END9]] +// CHECK1: cond.end9: +// CHECK1-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE7]] ], [ [[TMP19]], [[COND_FALSE8]] ] +// CHECK1-NEXT: store i16 [[COND10]], ptr [[TMP4]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: ret void @@ -836,71 +860,71 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK1-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK1-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK1-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK1-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: // CHECK1-NEXT: br label [[IFCONT7]] @@ -926,25 +950,25 @@ // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] @@ -953,25 +977,25 @@ // CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK1: then6: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK1-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK1-NEXT: br label [[IFCONT8:%.*]] // CHECK1: else7: // CHECK1-NEXT: br label [[IFCONT8]] // CHECK1: ifcont8: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK1: then10: -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK1-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK1-NEXT: br label [[IFCONT12:%.*]] // CHECK1: else11: // CHECK1-NEXT: br label [[IFCONT12]] @@ -994,71 +1018,71 @@ // CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK1-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK1-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK1-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK1-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: -// CHECK1-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK1-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK1: then5: -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK1-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK1-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK1-NEXT: br label [[IFCONT7:%.*]] // CHECK1: else6: // CHECK1-NEXT: br label [[IFCONT7]] @@ -1084,25 +1108,25 @@ // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK1: then2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK1-NEXT: br label [[IFCONT4:%.*]] // CHECK1: else3: // CHECK1-NEXT: br label [[IFCONT4]] @@ -1111,25 +1135,25 @@ // CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK1: then6: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK1-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK1-NEXT: br label [[IFCONT8:%.*]] // CHECK1: else7: // CHECK1-NEXT: br label [[IFCONT8]] // CHECK1: ifcont8: // CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK1: then10: -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK1-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK1-NEXT: br label [[IFCONT12:%.*]] // CHECK1: else11: // CHECK1-NEXT: br label [[IFCONT12]] @@ -1147,20 +1171,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2 -// CHECK1-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 +// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 // CHECK1-NEXT: ret void // // @@ -1175,17 +1199,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -1199,20 +1223,20 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128 -// CHECK1-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK1-NEXT: ret void // // @@ -1227,17 +1251,17 @@ // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -1246,6 +1270,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 @@ -1257,9 +1282,11 @@ // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1267,38 +1294,40 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK2-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) -// CHECK2-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK2-NEXT: store double [[ADD]], ptr [[E1]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[E:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[E]], align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK2-NEXT: store double [[ADD]], ptr [[E]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[E]], ptr [[TMP6]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP7]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) // CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 // CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8 -// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] -// CHECK2-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, ptr [[E]], align 8 +// CHECK2-NEXT: [[ADD1:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK2-NEXT: store double [[ADD1]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i32 8) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[E]], i32 8) // CHECK2-NEXT: ret void // // @@ -1316,51 +1345,51 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK2-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK2-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK2-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK2-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK2-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK2-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK2-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK2-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK2-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK2-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK2-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK2-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: -// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 -// CHECK2-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK2-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: // CHECK2-NEXT: br label [[IFCONT6]] @@ -1386,41 +1415,41 @@ // CHECK2-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK2-NEXT: br label [[PRECOND:%.*]] // CHECK2: precond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK2-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK2-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK2: body: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK2-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] // CHECK2: ifcont4: -// CHECK2-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4 // CHECK2-NEXT: br label [[PRECOND]] // CHECK2: exit: // CHECK2-NEXT: ret void @@ -1436,14 +1465,14 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8 -// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 // CHECK2-NEXT: ret void // // @@ -1458,13 +1487,13 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1478,14 +1507,14 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128 -// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK2-NEXT: ret void // // @@ -1500,13 +1529,13 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1515,6 +1544,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 @@ -1524,17 +1554,21 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1 -// CHECK2-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK2-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1 +// CHECK2-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1 // CHECK2-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK2-NEXT: store float [[TMP2]], ptr [[D3]], align 4 +// CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]] -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i32 4) -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i32 1) +// CHECK2-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1542,58 +1576,59 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK2-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK2-NEXT: store i8 0, ptr [[C1]], align 1 -// CHECK2-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: store i8 0, ptr [[C]], align 1 +// CHECK2-NEXT: store float 1.000000e+00, ptr [[D]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, ptr [[C]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK2-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK2-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK2-NEXT: store float [[MUL]], ptr [[D2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK2-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK2-NEXT: store i8 [[CONV1]], ptr [[C]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, ptr [[D]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK2-NEXT: store float [[MUL]], ptr [[D]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[C]], ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[D]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP11]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 -// CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i8, ptr [[C]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK2-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK2-NEXT: store i8 [[CONV5]], ptr [[TMP2]], align 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load float, ptr [[D]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = fmul float [[TMP16]], [[TMP17]] +// CHECK2-NEXT: store float [[MUL6]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) -// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D]], i32 4) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 1) // CHECK2-NEXT: ret void // // @@ -1612,71 +1647,71 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 -// CHECK2-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 -// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK2-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK2-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK2-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK2-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16 +// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]]) +// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK2-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16 +// CHECK2-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]]) +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 -// CHECK2-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] -// CHECK2-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] -// CHECK2-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] -// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] -// CHECK2-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] -// CHECK2-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: -// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 -// CHECK2-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 -// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 -// CHECK2-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1 +// CHECK2-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4 +// CHECK2-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: // CHECK2-NEXT: br label [[IFCONT7]] @@ -1702,25 +1737,25 @@ // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK2-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +// CHECK2-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 -// CHECK2-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1 +// CHECK2-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] @@ -1729,25 +1764,25 @@ // CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK2: then6: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4 // CHECK2-NEXT: br label [[IFCONT8:%.*]] // CHECK2: else7: // CHECK2-NEXT: br label [[IFCONT8]] // CHECK2: ifcont8: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK2: then10: -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 -// CHECK2-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 // CHECK2-NEXT: br label [[IFCONT12:%.*]] // CHECK2: else11: // CHECK2-NEXT: br label [[IFCONT12]] @@ -1765,20 +1800,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK2-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4 -// CHECK2-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 // CHECK2-NEXT: ret void // // @@ -1793,17 +1828,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1817,20 +1852,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128 -// CHECK2-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128 -// CHECK2-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK2-NEXT: ret void // // @@ -1845,17 +1880,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -1864,6 +1899,7 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -1873,9 +1909,13 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] +// CHECK2-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -1883,130 +1923,138 @@ // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK2-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP11]], align 4 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK2-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK2-NEXT: store [[STRUCT_ANON_3]] [[TMP9]], ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP11]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i32 8) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP13]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP11]], ptr [[TMP14]], i32 1024, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) // CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 // CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: store i32 [[OR]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK2-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[B]], align 2 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: // CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK2-NEXT: store i16 [[COND]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP11]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK2-NEXT: store i32 [[OR]], ptr [[A1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK2-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[OR]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK2-NEXT: store i16 [[CONV2]], ptr [[B]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP9]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) // CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 // CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK2: cond.true9: -// CHECK2-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK2-NEXT: br label [[COND_END11:%.*]] -// CHECK2: cond.false10: -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK2-NEXT: br label [[COND_END11]] -// CHECK2: cond.end11: -// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK2-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK2-NEXT: [[OR3:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK2: cond.true7: +// CHECK2-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK2-NEXT: br label [[COND_END9:%.*]] +// CHECK2: cond.false8: +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[B]], align 2 +// CHECK2-NEXT: br label [[COND_END9]] +// CHECK2: cond.end9: +// CHECK2-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE7]] ], [ [[TMP19]], [[COND_FALSE8]] ] +// CHECK2-NEXT: store i16 [[COND10]], ptr [[TMP4]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: ret void @@ -2027,71 +2075,71 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK2-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK2-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK2-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK2-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK2-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: -// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK2-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK2-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK2-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: // CHECK2-NEXT: br label [[IFCONT7]] @@ -2117,25 +2165,25 @@ // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] @@ -2144,25 +2192,25 @@ // CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK2: then6: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK2-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK2-NEXT: br label [[IFCONT8:%.*]] // CHECK2: else7: // CHECK2-NEXT: br label [[IFCONT8]] // CHECK2: ifcont8: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK2: then10: -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK2-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK2-NEXT: br label [[IFCONT12:%.*]] // CHECK2: else11: // CHECK2-NEXT: br label [[IFCONT12]] @@ -2185,71 +2233,71 @@ // CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK2-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK2-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK2-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK2-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK2-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK2-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK2-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK2-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK2-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: -// CHECK2-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK2-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK2-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK2-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK2: then5: -// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK2-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK2-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK2-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK2-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK2-NEXT: br label [[IFCONT7:%.*]] // CHECK2: else6: // CHECK2-NEXT: br label [[IFCONT7]] @@ -2275,25 +2323,25 @@ // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK2: then2: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK2-NEXT: br label [[IFCONT4:%.*]] // CHECK2: else3: // CHECK2-NEXT: br label [[IFCONT4]] @@ -2302,25 +2350,25 @@ // CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK2: then6: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK2-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK2-NEXT: br label [[IFCONT8:%.*]] // CHECK2: else7: // CHECK2-NEXT: br label [[IFCONT8]] // CHECK2: ifcont8: // CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK2: then10: -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK2-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK2-NEXT: br label [[IFCONT12:%.*]] // CHECK2: else11: // CHECK2-NEXT: br label [[IFCONT12]] @@ -2338,20 +2386,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2 -// CHECK2-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 +// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 // CHECK2-NEXT: ret void // // @@ -2366,17 +2414,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -2390,20 +2438,20 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128 -// CHECK2-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK2-NEXT: ret void // // @@ -2418,17 +2466,17 @@ // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void // // @@ -2437,6 +2485,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 @@ -2448,9 +2497,11 @@ // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -2458,38 +2509,40 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) -// CHECK3-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK3-NEXT: store double [[ADD]], ptr [[E1]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[E1]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[E:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[E]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK3-NEXT: store double [[ADD]], ptr [[E]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[E]], ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP7]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP7]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) // CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 // CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[E1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[E]], align 8 +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store double [[ADD1]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i32 8) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[E]], i32 8) // CHECK3-NEXT: ret void // // @@ -2507,51 +2560,51 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK3-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK3-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK3-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK3-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK3-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK3-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK3-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK3-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK3-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK3-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK3-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 -// CHECK3-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK3-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: // CHECK3-NEXT: br label [[IFCONT6]] @@ -2577,41 +2630,41 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK3-NEXT: br label [[PRECOND:%.*]] // CHECK3: precond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK3-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK3-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK3: body: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK3-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] // CHECK3: ifcont4: -// CHECK3-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4 // CHECK3-NEXT: br label [[PRECOND]] // CHECK3: exit: // CHECK3-NEXT: ret void @@ -2627,14 +2680,14 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP9]], align 8 -// CHECK3-NEXT: store double [[TMP12]], ptr [[TMP11]], align 128 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP8]], align 128 // CHECK3-NEXT: ret void // // @@ -2649,13 +2702,13 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP10]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -2669,14 +2722,14 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 128 -// CHECK3-NEXT: store double [[TMP12]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 128 +// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8 // CHECK3-NEXT: ret void // // @@ -2691,13 +2744,13 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP10]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x double], ptr [[E]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -2706,6 +2759,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 @@ -2715,17 +2769,21 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1 -// CHECK3-NEXT: [[C2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C2]], align 1 +// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1 // CHECK3-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[D3:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: store float [[TMP2]], ptr [[D3]], align 4 +// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: store float [[TMP2]], ptr [[D2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C2]], ptr [[D3]]) #[[ATTR4]] -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D3]], i32 4) -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C2]], i32 1) +// CHECK3-NEXT: call void @__omp_outlined__1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -2733,58 +2791,59 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: store i8 0, ptr [[C1]], align 1 -// CHECK3-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: store i8 0, ptr [[C]], align 1 +// CHECK3-NEXT: store float 1.000000e+00, ptr [[D]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[C]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 // CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK3-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK3-NEXT: store float [[MUL]], ptr [[D2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[C1]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[D2]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP10]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) -// CHECK3-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK3-NEXT: store i8 [[CONV1]], ptr [[C]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr [[D]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 +// CHECK3-NEXT: store float [[MUL]], ptr [[D]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[D]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP11]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4, ptr @_omp_reduction_list_to_global_copy_func5, ptr @_omp_reduction_list_to_global_reduce_func6, ptr @_omp_reduction_global_to_list_copy_func7, ptr @_omp_reduction_global_to_list_reduce_func8) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 -// CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[D2]], align 4 -// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[C]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK3-NEXT: [[XOR4:%.*]] = xor i32 [[CONV2]], [[CONV3]] +// CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[XOR4]] to i8 +// CHECK3-NEXT: store i8 [[CONV5]], ptr [[TMP2]], align 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[D]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store float [[MUL6]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D]], i32 4) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C]], i32 1) // CHECK3-NEXT: ret void // // @@ -2803,71 +2862,71 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP10]], align 1 -// CHECK3-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i16 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK3-NEXT: [[TMP18:%.*]] = trunc i32 [[TMP17]] to i8 -// CHECK3-NEXT: store i8 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[TMP10]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[TMP23]], i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK3-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[TMP23]], i32 1 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 +// CHECK3-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]]) +// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK3-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]]) +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP41:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP41]], 0 -// CHECK3-NEXT: [[TMP43:%.*]] = and i1 [[TMP40]], [[TMP42]] -// CHECK3-NEXT: [[TMP44:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP45:%.*]] = and i1 [[TMP43]], [[TMP44]] -// CHECK3-NEXT: [[TMP46:%.*]] = or i1 [[TMP36]], [[TMP39]] -// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP46]], [[TMP45]] -// CHECK3-NEXT: br i1 [[TMP47]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP50:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP51:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP52:%.*]] = and i1 [[TMP50]], [[TMP51]] -// CHECK3-NEXT: br i1 [[TMP52]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP54]], align 1 -// CHECK3-NEXT: store i8 [[TMP57]], ptr [[TMP56]], align 1 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP60:%.*]] = load ptr, ptr [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = load float, ptr [[TMP60]], align 4 -// CHECK3-NEXT: store float [[TMP64]], ptr [[TMP63]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1 +// CHECK3-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4 +// CHECK3-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: // CHECK3-NEXT: br label [[IFCONT7]] @@ -2893,25 +2952,25 @@ // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK3-NEXT: store volatile i8 [[TMP12]], ptr addrspace(3) [[TMP10]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1 +// CHECK3-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i8, ptr addrspace(3) [[TMP14]], align 1 -// CHECK3-NEXT: store i8 [[TMP18]], ptr [[TMP17]], align 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1 +// CHECK3-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] @@ -2920,25 +2979,25 @@ // CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK3: then6: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP23]], ptr addrspace(3) [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4 // CHECK3-NEXT: br label [[IFCONT8:%.*]] // CHECK3: else7: // CHECK3-NEXT: br label [[IFCONT8]] // CHECK3: ifcont8: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP24]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK3: then10: -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load volatile i32, ptr addrspace(3) [[TMP25]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 // CHECK3-NEXT: br label [[IFCONT12:%.*]] // CHECK3: else11: // CHECK3-NEXT: br label [[IFCONT12]] @@ -2956,20 +3015,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK3-NEXT: store i8 [[TMP11]], ptr [[TMP10]], align 128 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store float [[TMP16]], ptr [[TMP15]], align 128 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 +// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP12]], align 128 // CHECK3-NEXT: ret void // // @@ -2984,17 +3043,17 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP12]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -3008,20 +3067,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 128 -// CHECK3-NEXT: store i8 [[TMP11]], ptr [[TMP9]], align 1 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 128 -// CHECK3-NEXT: store float [[TMP16]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 128 +// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 128 +// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4 // CHECK3-NEXT: ret void // // @@ -3036,17 +3095,17 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP12]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i8], ptr [[C]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x float], ptr [[D]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -3055,6 +3114,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -3064,9 +3124,13 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] +// CHECK3-NEXT: call void @__omp_outlined__9(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -3074,130 +3138,138 @@ // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK3-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_3]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2) -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 4 +// CHECK3-NEXT: store [[STRUCT_ANON_3]] [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP11]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr null, ptr [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[TMP7]], i32 8) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP13]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[TMP14]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP11]], ptr [[TMP14]], i32 2048, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func15, ptr @_omp_reduction_inter_warp_copy_func16, ptr @_omp_reduction_list_to_global_copy_func17, ptr @_omp_reduction_list_to_global_reduce_func18, ptr @_omp_reduction_global_to_list_copy_func19, ptr @_omp_reduction_global_to_list_reduce_func20) // CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 // CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 // CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i32 [[OR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK3-NEXT: [[TMP20:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV1]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP4]], align 2 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[B2]], align 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[B]], align 2 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: // CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK3-NEXT: store i16 [[COND]], ptr [[TMP4]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP11]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[OR]], ptr [[A1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: store i16 -32768, ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[OR]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV1]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[COND]] to i16 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB3]], i32 [[TMP9]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func12, ptr @_omp_reduction_inter_warp_copy_func13) // CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK3: cond.true9: -// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: br label [[COND_END11:%.*]] -// CHECK3: cond.false10: -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: br label [[COND_END11]] -// CHECK3: cond.end11: -// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK3-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[OR3:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[OR3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV5:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[CONV4]], [[CONV5]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK3: cond.true7: +// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: br label [[COND_END9:%.*]] +// CHECK3: cond.false8: +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: br label [[COND_END9]] +// CHECK3: cond.end9: +// CHECK3-NEXT: [[COND10:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE7]] ], [ [[TMP19]], [[COND_FALSE8]] ] +// CHECK3-NEXT: store i16 [[COND10]], ptr [[TMP4]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP9]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: ret void @@ -3218,71 +3290,71 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK3-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK3-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK3-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK3-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK3-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK3-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK3-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: // CHECK3-NEXT: br label [[IFCONT7]] @@ -3308,25 +3380,25 @@ // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] @@ -3335,25 +3407,25 @@ // CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK3: then6: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK3-NEXT: br label [[IFCONT8:%.*]] // CHECK3: else7: // CHECK3-NEXT: br label [[IFCONT8]] // CHECK3: ifcont8: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK3: then10: -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK3-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK3-NEXT: br label [[IFCONT12:%.*]] // CHECK3: else11: // CHECK3-NEXT: br label [[IFCONT12]] @@ -3376,71 +3448,71 @@ // CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i16 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP17]]) -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP11]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP24]], align 2 -// CHECK3-NEXT: [[TMP29:%.*]] = sext i16 [[TMP28]] to i32 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP29]], i16 [[TMP7]], i16 [[TMP31]]) -// CHECK3-NEXT: [[TMP33:%.*]] = trunc i32 [[TMP32]] to i16 -// CHECK3-NEXT: store i16 [[TMP33]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr i16, ptr [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP39:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP42:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp eq i16 [[TMP42]], 0 -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP41]], [[TMP43]] -// CHECK3-NEXT: [[TMP45:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP46:%.*]] = and i1 [[TMP44]], [[TMP45]] -// CHECK3-NEXT: [[TMP47:%.*]] = or i1 [[TMP37]], [[TMP40]] -// CHECK3-NEXT: [[TMP48:%.*]] = or i1 [[TMP47]], [[TMP46]] -// CHECK3-NEXT: br i1 [[TMP48]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 +// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 +// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 +// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) +// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 +// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 +// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] +// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] +// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: -// CHECK3-NEXT: [[TMP51:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP52:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP53:%.*]] = and i1 [[TMP51]], [[TMP52]] -// CHECK3-NEXT: br i1 [[TMP53]], label [[THEN5:%.*]], label [[ELSE6:%.*]] +// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] // CHECK3: then5: -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP56:%.*]] = load ptr, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP59:%.*]] = load ptr, ptr [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK3-NEXT: store i32 [[TMP60]], ptr [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP64]], align 4 -// CHECK3-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP63]], align 2 -// CHECK3-NEXT: store i16 [[TMP67]], ptr [[TMP66]], align 2 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 +// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 // CHECK3-NEXT: br label [[IFCONT7:%.*]] // CHECK3: else6: // CHECK3-NEXT: br label [[IFCONT7]] @@ -3466,25 +3538,25 @@ // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4 // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP13]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK3: then2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 // CHECK3-NEXT: br label [[IFCONT4:%.*]] // CHECK3: else3: // CHECK3-NEXT: br label [[IFCONT4]] @@ -3493,25 +3565,25 @@ // CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]] // CHECK3: then6: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i16, ptr [[TMP20]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP24]], ptr addrspace(3) [[TMP22]], align 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2 +// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2 // CHECK3-NEXT: br label [[IFCONT8:%.*]] // CHECK3: else7: // CHECK3-NEXT: br label [[IFCONT8]] // CHECK3: ifcont8: // CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP25]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]] // CHECK3: then10: -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load volatile i16, ptr addrspace(3) [[TMP26]], align 2 -// CHECK3-NEXT: store i16 [[TMP31]], ptr [[TMP29]], align 2 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2 +// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2 // CHECK3-NEXT: br label [[IFCONT12:%.*]] // CHECK3: else11: // CHECK3-NEXT: br label [[IFCONT12]] @@ -3529,20 +3601,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 128 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP14]], align 2 -// CHECK3-NEXT: store i16 [[TMP17]], ptr [[TMP16]], align 128 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 128 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP12]], align 128 // CHECK3-NEXT: ret void // // @@ -3557,17 +3629,17 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP13]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]] // CHECK3-NEXT: ret void // // @@ -3581,20 +3653,20 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP16]], align 128 -// CHECK3-NEXT: store i16 [[TMP17]], ptr [[TMP14]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 128 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 128 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2 // CHECK3-NEXT: ret void // // @@ -3609,16 +3681,16 @@ // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP13]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2048 x i32], ptr [[A]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2048 x i16], ptr [[B]], i32 0, i32 [[TMP4]] +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/openmp_win_codegen.cpp b/clang/test/OpenMP/openmp_win_codegen.cpp --- a/clang/test/OpenMP/openmp_win_codegen.cpp +++ b/clang/test/OpenMP/openmp_win_codegen.cpp @@ -49,34 +49,38 @@ // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @"?main@Test@@SAXXZ"() -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: invoke void @"?foo@@YAXXZ"() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[CATCH_DISPATCH:%.*]] // CHECK1: catch.dispatch: -// CHECK1-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch] unwind label [[TERMINATE:%.*]] +// CHECK1-NEXT: [[TMP1:%.*]] = catchswitch within none [label %catch] unwind label [[TERMINATE:%.*]] // CHECK1: catch: -// CHECK1-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [ptr @"??_R0H@8", i32 0, ptr %t] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP1]]) ] -// CHECK1-NEXT: invoke void @"?bar@@YAXXZ"() [ "funclet"(token [[TMP1]]) ] +// CHECK1-NEXT: [[TMP2:%.*]] = catchpad within [[TMP1]] [ptr @"??_R0H@8", i32 0, ptr %t] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP2]]) ] +// CHECK1-NEXT: invoke void @"?bar@@YAXXZ"() [ "funclet"(token [[TMP2]]) ] // CHECK1-NEXT: to label [[INVOKE_CONT1:%.*]] unwind label [[TERMINATE2:%.*]] // CHECK1: invoke.cont1: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP3]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP1]]) ] -// CHECK1-NEXT: catchret from [[TMP1]] to label [[CATCHRET_DEST:%.*]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.var) [ "funclet"(token [[TMP2]]) ] +// CHECK1-NEXT: catchret from [[TMP2]] to label [[CATCHRET_DEST:%.*]] // CHECK1: catchret.dest: // CHECK1-NEXT: br label [[TRY_CONT:%.*]] // CHECK1: try.cont: @@ -84,47 +88,49 @@ // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[TRY_CONT]] // CHECK1: terminate: -// CHECK1-NEXT: [[TMP4:%.*]] = cleanuppad within none [] -// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7:[0-9]+]] [ "funclet"(token [[TMP4]]) ] +// CHECK1-NEXT: [[TMP5:%.*]] = cleanuppad within none [] +// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7:[0-9]+]] [ "funclet"(token [[TMP5]]) ] // CHECK1-NEXT: unreachable // CHECK1: terminate2: -// CHECK1-NEXT: [[TMP5:%.*]] = cleanuppad within [[TMP1]] [] -// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7]] [ "funclet"(token [[TMP5]]) ] +// CHECK1-NEXT: [[TMP6:%.*]] = cleanuppad within [[TMP2]] [] +// CHECK1-NEXT: call void @"?terminate@@YAXXZ"() #[[ATTR7]] [ "funclet"(token [[TMP6]]) ] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LOCAL_J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[J_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 3, ptr [[LOCAL_J]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: store i32 4, ptr [[LOCAL_J]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[LOCAL_J]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[LOCAL_J]], ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP2]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func, i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func, i32 [[TMP8]]) // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[LOCAL_J]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -136,12 +142,12 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4 // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -568,29 +568,29 @@ // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 // CHECK1-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group !3 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -641,30 +641,30 @@ // CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] // CHECK1: omp.inner.for.cond29: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] // CHECK1: omp.inner.for.body32: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 // CHECK1-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]] -// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]] -// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group !7 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] // CHECK1: omp.body.continue37: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] // CHECK1: omp.inner.for.inc38: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 -// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 +// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: omp.inner.for.end40: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -1188,31 +1188,31 @@ // CHECK1-IRBUILDER-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1-IRBUILDER: omp.inner.for.cond: -// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 // CHECK1-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] // CHECK1-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body: -// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 -// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1 // CHECK1-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]] -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group !3 -// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]] -// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]] // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body.ordered.after: // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1-IRBUILDER: omp.body.continue: // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1-IRBUILDER: omp.inner.for.inc: -// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1 -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1-IRBUILDER: omp.inner.for.end: // CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1265,33 +1265,33 @@ // CHECK1-IRBUILDER-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4 // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] // CHECK1-IRBUILDER: omp.inner.for.cond30: -// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK1-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] // CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body33: -// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1 // CHECK1-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]] -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 // CHECK1-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]] -// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group !7 -// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]] // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after: // CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] // CHECK1-IRBUILDER: omp.body.continue38: // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] // CHECK1-IRBUILDER: omp.inner.for.inc39: -// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]]) -// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]] // CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1-IRBUILDER: omp.inner.for.end42: // CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -1793,29 +1793,29 @@ // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 // CHECK3-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group !3 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group !3 +// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1866,30 +1866,30 @@ // CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] // CHECK3: omp.inner.for.cond29: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] // CHECK3: omp.inner.for.body32: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 // CHECK3-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]] -// CHECK3-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK3-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]] -// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group !7 +// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] // CHECK3: omp.body.continue37: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] // CHECK3: omp.inner.for.inc38: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 -// CHECK3-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 +// CHECK3-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end40: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -2413,31 +2413,31 @@ // CHECK3-IRBUILDER-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3-IRBUILDER: omp.inner.for.cond: -// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK3-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 // CHECK3-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] // CHECK3-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body: -// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 -// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK3-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1 // CHECK3-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]] -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group !3 -// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK3-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK3-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]] -// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group !3 -// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]] // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body.ordered.after: // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3-IRBUILDER: omp.body.continue: // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3-IRBUILDER: omp.inner.for.inc: -// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1 -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK3-IRBUILDER: omp.inner.for.end: // CHECK3-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2490,33 +2490,33 @@ // CHECK3-IRBUILDER-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4 // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]] // CHECK3-IRBUILDER: omp.inner.for.cond30: -// CHECK3-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1 // CHECK3-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]] // CHECK3-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body33: -// CHECK3-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1 // CHECK3-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]] -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 // CHECK3-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]] -// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group !7 -// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]] // CHECK3-IRBUILDER: omp.inner.for.body33.ordered.after: // CHECK3-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]] // CHECK3-IRBUILDER: omp.body.continue38: // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]] // CHECK3-IRBUILDER: omp.inner.for.inc39: -// CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1 -// CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]]) -// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group !7 +// CHECK3-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3-IRBUILDER: omp.inner.for.end42: // CHECK3-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]] @@ -2855,32 +2855,32 @@ // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1 // CHECK5-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]] // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]] -// CHECK5-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP12]] to i64 // CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX10]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2921,32 +2921,32 @@ // CHECK5-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV30]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND32:%.*]] // CHECK5: omp.inner.for.cond32: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: [[ADD33:%.*]] = add i32 [[TMP27]], 1 // CHECK5-NEXT: [[CMP34:%.*]] = icmp ult i32 [[TMP26]], [[ADD33]] // CHECK5-NEXT: br i1 [[CMP34]], label [[OMP_INNER_FOR_BODY35:%.*]], label [[OMP_INNER_FOR_END45:%.*]] // CHECK5: omp.inner.for.body35: -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: [[MUL36:%.*]] = mul i32 [[TMP29]], 1 // CHECK5-NEXT: [[ADD37:%.*]] = add i32 [[TMP28]], [[MUL36]] -// CHECK5-NEXT: store i32 [[ADD37]], ptr [[I31]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[I31]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: store i32 [[ADD37]], ptr [[I31]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[I31]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP30]] to i64 // CHECK5-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM38]] -// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX39]], align 4, !llvm.access.group !13 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[I31]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX39]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[I31]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: [[IDXPROM40:%.*]] = sext i32 [[TMP31]] to i64 // CHECK5-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM40]] -// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX41]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: store float 1.000000e+00, ptr [[ARRAYIDX41]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE42:%.*]] // CHECK5: omp.body.continue42: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC43:%.*]] // CHECK5: omp.inner.for.inc43: -// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: [[ADD44:%.*]] = add i32 [[TMP32]], 1 -// CHECK5-NEXT: store i32 [[ADD44]], ptr [[DOTOMP_IV30]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: store i32 [[ADD44]], ptr [[DOTOMP_IV30]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND32]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end45: // CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_19]], align 4 diff --git a/clang/test/OpenMP/outlined_artificial.c b/clang/test/OpenMP/outlined_artificial.c --- a/clang/test/OpenMP/outlined_artificial.c +++ b/clang/test/OpenMP/outlined_artificial.c @@ -54,14 +54,11 @@ // CHECK-DAG: !DISubprogram(name: "__captured_stmt" // CHECK-DAG-SAME: flags: DIFlagArtificial -// CHECK-DAG: !DISubprogram(name: ".omp_outlined._debug__" +// CHECK-DAG: !DISubprogram(name: ".omp_outlined." // CHECK-DAG-SAME: flags: DIFlagArtificial // CHECK-DAG: !DISubprogram(linkageName: ".omp_task_entry." // CHECK-DAG-SAME: flags: DIFlagArtificial -// CHECK-DAG: !DISubprogram(name: ".omp_outlined." -// CHECK-DAG-SAME: flags: DIFlagArtificial - // CHECK-DAG: !DISubprogram(name: ".omp_outlined..1" // CHECK-DAG-SAME: flags: DIFlagArtificial diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -78,6 +78,9 @@ // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -87,45 +90,56 @@ // CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], ptr [[VLA]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..3, i64 [[TMP1]], ptr [[VLA]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP8]]) // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP10]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -145,105 +159,121 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[GLOBAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16 -// CHECK1-NEXT: store i64 [[TMP0]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..2, i64 [[TMP0]], ptr [[VLA1]], ptr [[GLOBAL]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 16 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[GLOBAL]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP7]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP9:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP10:%.*]] = extractvalue { ptr, i32 } [[TMP9]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP10]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, i64 [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // @@ -251,6 +281,7 @@ // CHECK1-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0 @@ -258,38 +289,43 @@ // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[ARGC_ADDR]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP5]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 0, [[TMP4]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP7]] // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0 // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP9]]) #[[ATTR6]] // CHECK1-NEXT: unreachable // // @@ -309,6 +345,9 @@ // CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] @@ -322,59 +361,69 @@ // CHECK2-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG23]] // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] // CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG32:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB5:[0-9]+]], i32 1, ptr @.omp_outlined..4, i64 [[TMP1]]), !dbg [[DBG33:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 2, ptr @.omp_outlined..8, i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG34:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]] -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP3]]), !dbg [[DBG36:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG32:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8, !dbg [[DBG32]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG32]] +// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP4]], align 8, !dbg [[DBG32]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG32]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0, !dbg [[DBG33:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP5]], align 8, !dbg [[DBG33]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB5:[0-9]+]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]), !dbg [[DBG33]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0, !dbg [[DBG34:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8, !dbg [[DBG34]] +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1, !dbg [[DBG34]] +// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP7]], align 8, !dbg [[DBG34]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]), !dbg [[DBG34]] +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(ptr noundef [[TMP8]]), !dbg [[DBG36:![0-9]+]] // CHECK2-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4, !dbg [[DBG37:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP4]]), !dbg [[DBG38]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4, !dbg [[DBG38]] -// CHECK2-NEXT: ret i32 [[TMP5]], !dbg [[DBG38]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP9]]), !dbg [[DBG38]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4, !dbg [[DBG38]] +// CHECK2-NEXT: ret i32 [[TMP10]], !dbg [[DBG38]] // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG39:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG39:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG53]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG54]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG54:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG54]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG55:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG55]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG56:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG55:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG56:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG57:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG55]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG57:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG58:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG59:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG57]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG53]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG53]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG53]] -// CHECK2-NEXT: unreachable, !dbg [[DBG53]] +// CHECK2-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG56]] +// CHECK2-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[DBG56]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR7:[0-9]+]], !dbg [[DBG56]] +// CHECK2-NEXT: unreachable, !dbg [[DBG56]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG58:![0-9]+]] { +// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG60:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG65:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG67:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -384,331 +433,204 @@ // CHECK2-NEXT: unreachable // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG66:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG70:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG72:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: call void @.omp_outlined._debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG72]] -// CHECK2-NEXT: ret void, !dbg [[DBG72]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG75:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[GLOBAL:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG82:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG82]] -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG82]] -// CHECK2-NEXT: store i64 [[TMP0]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA1]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined..3, i64 [[TMP0]], ptr [[VLA1]], ptr [[GLOBAL]]), !dbg [[DBG82]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP2]]), !dbg [[DBG86]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG81:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG81]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP3:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG81]] +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP2]], align 16, !dbg [[DBG81]] +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG81]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG85:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG85]] +// CHECK2-NEXT: store ptr [[VLA]], ptr [[TMP5]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG85]] +// CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[TMP6]], align 8, !dbg [[DBG85]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG85]] +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]), !dbg [[DBG86]] // CHECK2-NEXT: ret void, !dbg [[DBG88:![0-9]+]] // // -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG89:![0-9]+]] { +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG89:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] -// CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL_ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG100]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG100]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG101:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG101]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG100]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG101:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG101]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP7]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG102:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG102:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG103:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG102]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG104:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG105:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG103]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG100]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG100]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR7]], !dbg [[DBG100]] -// CHECK2-NEXT: unreachable, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP9:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG102]] +// CHECK2-NEXT: [[TMP10:%.*]] = extractvalue { ptr, i32 } [[TMP9]], 0, !dbg [[DBG102]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP10]]) #[[ATTR7]], !dbg [[DBG102]] +// CHECK2-NEXT: unreachable, !dbg [[DBG102]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG105:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG106:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.2(ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG112]] -// CHECK2-NEXT: ret void, !dbg [[DBG112]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG117:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG117]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]] +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG118]] +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG118]] +// CHECK2-NEXT: ret void, !dbg [[DBG119:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG113:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG118:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.1(ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG118]] -// CHECK2-NEXT: ret void, !dbg [[DBG118]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG119:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG120:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG126:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG126]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 2, ptr @.omp_outlined..7, i64 [[TMP0]], ptr [[TMP1]]), !dbg [[DBG126]] -// CHECK2-NEXT: ret void, !dbg [[DBG127:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG128:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG135]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG136:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG136]] -// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG135]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG131:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG131]] +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG131]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG132:![0-9]+]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG132]] +// CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG133:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG137:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG138:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG139:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG137]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG134:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 1, !dbg [[DBG135:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG136:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG134]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG135]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG135]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR7]], !dbg [[DBG135]] -// CHECK2-NEXT: unreachable, !dbg [[DBG135]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG140:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.6(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG146]] -// CHECK2-NEXT: ret void, !dbg [[DBG146]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG147:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.5(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG153]] -// CHECK2-NEXT: ret void, !dbg [[DBG153]] +// CHECK2-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG133]] +// CHECK2-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[DBG133]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP8]]) #[[ATTR7]], !dbg [[DBG133]] +// CHECK2-NEXT: unreachable, !dbg [[DBG133]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG154:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG137:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG161]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG161]] -// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG162:![0-9]+]] -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB11:[0-9]+]], i32 2, ptr @.omp_outlined..10, ptr [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG163:![0-9]+]] -// CHECK2-NEXT: ret i32 0, !dbg [[DBG164:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG165:![0-9]+]] { +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG144:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG144]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG144]] +// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG145:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG146:![0-9]+]] +// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP4]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG146]] +// CHECK2-NEXT: store i64 [[TMP3]], ptr [[TMP5]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB11:[0-9]+]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG146]] +// CHECK2-NEXT: ret i32 0, !dbg [[DBG147:![0-9]+]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG148:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG175:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG175]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG176:![0-9]+]] -// CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG159:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !dbg [[DBG159]] +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG160:![0-9]+]] +// CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP5]]) +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG162:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VAR]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG187:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 [[TMP4]], !dbg [[DBG187]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0, !dbg [[DBG187]] -// CHECK2-NEXT: ret void, !dbg [[DBG188:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VAR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG171:![0-9]+]] +// CHECK2-NEXT: [[TMP7:%.*]] = mul nsw i64 0, [[TMP4]], !dbg [[DBG171]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP7]], !dbg [[DBG171]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 0, !dbg [[DBG171]] +// CHECK2-NEXT: ret void, !dbg [[DBG172:![0-9]+]] // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG178]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG178]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR7]], !dbg [[DBG178]] -// CHECK2-NEXT: unreachable, !dbg [[DBG178]] +// CHECK2-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: catch ptr null, !dbg [[DBG162]] +// CHECK2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP8]], 0, !dbg [[DBG162]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP9]]) #[[ATTR7]], !dbg [[DBG162]] +// CHECK2-NEXT: unreachable, !dbg [[DBG162]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG189:![0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG194:![0-9]+]] -// -// -// CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG195:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG173:![0-9]+]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]] -// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG201:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.9(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG201]] -// CHECK2-NEXT: ret void, !dbg [[DBG201]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG178:![0-9]+]] // // // CHECK3-LABEL: define {{[^@]+}}@main @@ -765,6 +687,8 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4 // CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1 // CHECK3-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] @@ -830,6 +754,8 @@ // CHECK3-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX2]], i64 0 +// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] +// CHECK3: omp.par.region.parallel.after: // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] @@ -903,7 +829,9 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG35]] // CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[LOADGEP_VLA]], i64 1, !dbg [[DBG35]] // CHECK4-NEXT: store i32 [[TMP3]], ptr [[ARRAYIDX1]], align 4, !dbg [[DBG35]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG35]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG35]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]] // CHECK4: omp.par.outlined.exit.exitStub: @@ -971,7 +899,9 @@ // CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 [[TMP5]], !dbg [[DBG66]] // CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX2]], i64 0, !dbg [[DBG66]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG67:![0-9]+]] +// CHECK4: omp.par.region.parallel.after: +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG67]] // CHECK4: omp.par.outlined.exit.exitStub: diff --git a/clang/test/OpenMP/parallel_copyin_codegen.cpp b/clang/test/OpenMP/parallel_copyin_codegen.cpp --- a/clang/test/OpenMP/parallel_copyin_codegen.cpp +++ b/clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -215,6 +215,8 @@ // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -251,10 +253,10 @@ // CHECK1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ4mainE3var) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END5]] // CHECK1: init.end5: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..3) -// CHECK1-NEXT: [[CALL6:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK1-NEXT: store i32 [[CALL6]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_6]]) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: store i32 [[CALL7]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[RETVAL]], align 4 // CHECK1-NEXT: ret i32 [[TMP8]] @@ -374,27 +376,30 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 @_ZZ4mainE3vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 @_ZZ4mainE3vec, i64 8, i1 false) +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ @_ZZ4mainE5s_arr, [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -402,57 +407,62 @@ // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done1: -// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP12]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP14]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3vec, i64 8, ptr @_ZZ4mainE3vec.cache.) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP13]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE3var, i64 4, ptr @_ZZ4mainE3var.cache.) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5s_arr, i64 8, ptr @_ZZ4mainE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ4mainE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5t_var, align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ4mainE5t_var, i64 4, ptr @_ZZ4mainE5t_var.cache.) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR2]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]], ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -468,7 +478,7 @@ // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_threadprivate_register(ptr @[[GLOB1]], ptr @_ZZ5tmainIiET_vE5s_arr, ptr @.__kmpc_global_ctor_..4, ptr null, ptr @.__kmpc_global_dtor_..5) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE5s_arr, i32 noundef 1) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_0]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor.6, ptr null, ptr @__dso_handle) #[[ATTR3]] // CHECK1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ5tmainIiET_vE5s_arr) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END]] @@ -488,8 +498,8 @@ // CHECK1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ5tmainIiET_vE3var) #[[ATTR3]] // CHECK1-NEXT: br label [[INIT_END5]] // CHECK1: init.end5: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..9) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_6]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -574,9 +584,9 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP1]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: ret ptr [[TMP2]] @@ -601,11 +611,11 @@ // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[TMP1]], i64 2 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[TMP1]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -620,8 +630,8 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_0:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_1:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @_ZZ5tmainIiET_vE5s_arr // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -651,73 +661,79 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 128 -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP6]], ptr align 128 @_ZZ5tmainIiET_vE3vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 128 +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP7]], ptr align 128 @_ZZ5tmainIiET_vE3vec, i64 8, i1 false) +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ @_ZZ5tmainIiET_vE5s_arr, [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done1: -// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 128 -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP12]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP14]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3vec, i64 8, ptr @_ZZ5tmainIiET_vE3vec.cache.) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP13]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE3var, i64 4, ptr @_ZZ5tmainIiET_vE3var.cache.) +// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 8, ptr @_ZZ5tmainIiET_vE5s_arr.cache.) +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP15]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP14]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP3]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ5tmainIiET_vE5t_var, i64 4, ptr @_ZZ5tmainIiET_vE5t_var.cache.) +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ5tmainIiET_vE5t_var to i64), [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK1: copyin.not.master: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP2]], align 128 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 128 // CHECK1-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK1: copyin.not.master.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -728,7 +744,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP0]], ptr @g, i64 4, ptr @g.cache.) // CHECK1-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 128 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[F]], align 4 @@ -753,7 +769,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP0]], ptr @g, i64 4, ptr @g.cache.) // CHECK1-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 128 @@ -773,27 +789,30 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK3-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP3]] -// CHECK3-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK3-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP4]] +// CHECK3-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK3: copyin.not.master: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr @g, align 128 -// CHECK3-NEXT: store volatile i32 [[TMP5]], ptr [[TMP2]], align 128 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr @g, align 128 +// CHECK3-NEXT: store volatile i32 [[TMP6]], ptr [[TMP3]], align 128 // CHECK3-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK3: copyin.not.master.end: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK3-NEXT: store volatile i32 1, ptr [[TMP6]], align 128 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK3-NEXT: store volatile i32 1, ptr [[TMP7]], align 128 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -813,35 +832,39 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK4-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP3]] -// CHECK4-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK4-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @g to i64), [[TMP4]] +// CHECK4-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK4: copyin.not.master: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr @g, align 128 -// CHECK4-NEXT: store volatile i32 [[TMP5]], ptr [[TMP2]], align 128 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr @g, align 128 +// CHECK4-NEXT: store volatile i32 [[TMP6]], ptr [[TMP3]], align 128 // CHECK4-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK4: copyin.not.master.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @g, i64 4, ptr @g.cache.) -// CHECK4-NEXT: store volatile i32 1, ptr [[TMP6]], align 128 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 -// CHECK4-NEXT: call void [[TMP7]](ptr noundef @__block_literal_global.2) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @g, i64 4, ptr @g.cache.) +// CHECK4-NEXT: store volatile i32 1, ptr [[TMP7]], align 128 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 +// CHECK4-NEXT: call void [[TMP8]](ptr noundef @__block_literal_global.2) // CHECK4-NEXT: ret void // // @@ -861,6 +884,7 @@ // CHECK5-LABEL: define {{[^@]+}}@_Z10array_funcv // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK5-NEXT: [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ10array_funcvE1s acquire, align 8 // CHECK5-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK5-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] @@ -883,7 +907,7 @@ // CHECK5-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ10array_funcvE1s) #[[ATTR1]] // CHECK5-NEXT: br label [[INIT_END]] // CHECK5: init.end: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // @@ -962,24 +986,27 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ10array_funcvE1a, i64 8, ptr @_ZZ10array_funcvE1a.cache.) -// CHECK5-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ10array_funcvE1a to i64), [[TMP3]] -// CHECK5-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ10array_funcvE1a, i64 8, ptr @_ZZ10array_funcvE1a.cache.) +// CHECK5-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @_ZZ10array_funcvE1a to i64), [[TMP4]] +// CHECK5-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK5: copyin.not.master: -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 @_ZZ10array_funcvE1a, i64 8, i1 false) -// CHECK5-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @_ZZ10array_funcvE1s, i64 16, ptr @_ZZ10array_funcvE1s.cache.) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP6]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 @_ZZ10array_funcvE1a, i64 8, i1 false) +// CHECK5-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @_ZZ10array_funcvE1s, i64 16, ptr @_ZZ10array_funcvE1s.cache.) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ @_ZZ10array_funcvE1s, [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] @@ -987,12 +1014,12 @@ // CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(8) ptr @_ZN2StaSERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done1: // CHECK5-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK5: copyin.not.master.end: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // @@ -1035,6 +1062,8 @@ // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -1059,18 +1088,28 @@ // CHECK11-NEXT: store i8 1, ptr @_ZGVZ4mainE3var, align 1 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: [[TMP4:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) -// CHECK11-NEXT: [[TMP6:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) -// CHECK11-NEXT: [[TMP7:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP4]], ptr [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP8]]) -// CHECK11-NEXT: [[CALL4:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK11-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP7:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_4]]) +// CHECK11-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK11-NEXT: store i32 [[CALL5]], ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP14]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1134,102 +1173,105 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] -// CHECK11-NEXT: br i1 [[TMP7]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: br i1 [[TMP12]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP10:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP14]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP15]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[TMP12:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP17]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]]) -// CHECK11-NEXT: [[TMP15:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP17]], i64 0, i64 0 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) -// CHECK11-NEXT: [[TMP19:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) -// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP19]], i64 0, i64 0 -// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP20:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3vec) +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP22]], i64 0, i64 0 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE3var) +// CHECK11-NEXT: [[TMP24:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP24]], i64 0, i64 0 +// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) -// CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK11-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 4 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP10:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK11-SAME: () #[[ATTR2]] comdat { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]], ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) @@ -1239,7 +1281,7 @@ // CHECK11-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3]] // CHECK11: init.check: // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE5s_arr, i32 noundef 1) -// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_0]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) +// CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S_1]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 1), i32 noundef 2) // CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(ptr @__cxx_global_array_dtor.2, ptr null, ptr @__dso_handle) #[[ATTR4]] // CHECK11-NEXT: store i8 1, ptr @_ZGVZ5tmainIiET_vE5s_arr, align 8 // CHECK11-NEXT: br label [[INIT_END]] @@ -1253,13 +1295,23 @@ // CHECK11-NEXT: store i8 1, ptr @_ZGVZ5tmainIiET_vE3var, align 8 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: [[TMP4:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) -// CHECK11-NEXT: [[TMP6:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) -// CHECK11-NEXT: [[TMP7:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP4]], ptr [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP8]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP7:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_4]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: ret i32 0 // @@ -1356,8 +1408,8 @@ // CHECK11-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_0:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S_1:%.*]], ptr @_ZZ5tmainIiET_vE5s_arr, i64 2), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @_ZZ5tmainIiET_vE5s_arr // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1366,90 +1418,91 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] -// CHECK11-NEXT: br i1 [[TMP7]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: br i1 [[TMP12]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP4]], align 128 -// CHECK11-NEXT: [[TMP9:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP9]], ptr align 128 [[TMP1]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP10:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP10]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP9]], align 128 +// CHECK11-NEXT: [[TMP14:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP14]], ptr align 128 [[TMP4]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP15:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP15]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[TMP12:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP17]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK11-NEXT: [[TMP15:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 128 -// CHECK11-NEXT: [[TMP17:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP17]], i64 0, i64 0 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[TMP18:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: [[TMP19:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) -// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP19]], i64 0, i64 0 -// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP20:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 128 +// CHECK11-NEXT: [[TMP22:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP22]], i64 0, i64 0 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[TMP23:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP24:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP24]], i64 0, i64 0 +// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) -// CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK11-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 128 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 128 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP9]]) // CHECK11-NEXT: ret void // // @@ -1459,7 +1512,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP1]], ptr [[F]], align 4 @@ -1483,7 +1536,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) // CHECK11-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[TMP1]], align 128 @@ -1509,31 +1562,33 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 1 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK13-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK13-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK13-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK13-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK13-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK13-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK13-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK13-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK13: copyin.not.master: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK13-NEXT: store volatile i32 [[TMP5]], ptr [[TMP1]], align 128 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK13-NEXT: store volatile i32 [[TMP7]], ptr [[TMP3]], align 128 // CHECK13-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK13: copyin.not.master.end: -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP8:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK13-NEXT: store volatile i32 1, ptr [[TMP8]], align 128 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK13-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK13-NEXT: [[TMP10:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK13-NEXT: store volatile i32 1, ptr [[TMP10]], align 128 // CHECK13-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK13-NEXT: ret void // @@ -1559,40 +1614,45 @@ // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK14-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK14-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK14-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK14-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK14-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK14-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK14-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK14-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK14: copyin.not.master: -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK14-NEXT: store volatile i32 [[TMP5]], ptr [[TMP1]], align 128 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK14-NEXT: store volatile i32 [[TMP7]], ptr [[TMP3]], align 128 // CHECK14-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK14: copyin.not.master.end: -// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK14-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK14-NEXT: [[TMP8:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) -// CHECK14-NEXT: store volatile i32 1, ptr [[TMP8]], align 128 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 -// CHECK14-NEXT: call void [[TMP9]](ptr noundef @__block_literal_global.2) +// CHECK14-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK14-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK14-NEXT: [[TMP10:%.*]] = call align 128 ptr @llvm.threadlocal.address.p0(ptr align 128 @g) +// CHECK14-NEXT: store volatile i32 1, ptr [[TMP10]], align 128 +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr @__block_literal_global.2, i32 0, i32 3), align 8 +// CHECK14-NEXT: call void [[TMP11]](ptr noundef @__block_literal_global.2) // CHECK14-NEXT: ret void // // @@ -1617,6 +1677,7 @@ // CHECK15-LABEL: define {{[^@]+}}@_Z10array_funcv // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK15-NEXT: [[TMP0:%.*]] = load i8, ptr @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 // CHECK15-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !prof [[PROF3:![0-9]+]] @@ -1633,9 +1694,13 @@ // CHECK15-NEXT: store i8 1, ptr @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: br label [[INIT_END]] // CHECK15: init.end: -// CHECK15-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) -// CHECK15-NEXT: [[TMP3:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP2]], ptr [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) +// CHECK15-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) +// CHECK15-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // @@ -1676,44 +1741,45 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[S:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK15-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK15-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) -// CHECK15-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK15-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK15-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] -// CHECK15-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK15-NEXT: [[TMP5:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZZ10array_funcvE1a) +// CHECK15-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK15-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK15-NEXT: [[TMP8:%.*]] = icmp ne i64 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: br i1 [[TMP8]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK15: copyin.not.master: -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK15-NEXT: [[TMP6:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP6]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK15-NEXT: [[TMP9:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @_ZZ10array_funcvE1s) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], ptr [[TMP9]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_ST:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] // CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(8) ptr @_ZN2StaSERKS_(ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_ST]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] // CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK15: omp.arraycpy.done1: // CHECK15-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK15: copyin.not.master.end: -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]]) // CHECK15-NEXT: ret void // // @@ -1762,49 +1828,58 @@ // CHECK16-LABEL: define {{[^@]+}}@_Z3foov // CHECK16-SAME: () #[[ATTR3:[0-9]+]] { // CHECK16-NEXT: entry: -// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK16-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK16-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK16-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK16-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK16-NEXT: [[TMP0:%.*]] = call ptr @_ZTW1t() -// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK16-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP2:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK16-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T:%.*]]) #[[ATTR4]] { +// CHECK16-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK16-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CHECK16-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK16-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK16-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 -// CHECK16-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 -// CHECK16-NEXT: [[TMP1:%.*]] = call ptr @_ZTW1t() -// CHECK16-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK16-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK16-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK16-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK16-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK16-NEXT: [[TMP3:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK16-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK16-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK16-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK16: copyin.not.master: -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK16-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 4 // CHECK16-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK16: copyin.not.master.end: -// CHECK16-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK16-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) -// CHECK16-NEXT: [[TMP8:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK16-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[INC]], ptr [[TMP8]], align 4 +// CHECK16-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK16-NEXT: [[TMP10:%.*]] = call ptr @_ZTW1t() +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK16-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK16-NEXT: store i32 [[INC]], ptr [[TMP10]], align 4 // CHECK16-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -249,10 +249,9 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr nonnull align 4 dereferenceable(28) [[SS]], ptr nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) @@ -263,34 +262,40 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[VAR]], float 3.000000e+00) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[VEC]], i32 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_CASTED1]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP7]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP10]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -330,66 +335,70 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK1-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -437,21 +446,26 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i32 4, ptr inttoptr (i32 1 to ptr)) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i32 4, ptr inttoptr (i32 1 to ptr)) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTT_VAR__VOID_ADDR]], align 4 -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i32 1 to ptr)) +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i32 1 to ptr)) // CHECK1-NEXT: ret void // // @@ -459,38 +473,50 @@ // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[T_VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -502,9 +528,7 @@ // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C7:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[E:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 @@ -531,65 +555,76 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C7]], align 4 // CHECK1-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: store ptr [[E9]], ptr [[E]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], ptr [[TMP10]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 4 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E3]], ptr align 4 [[TMP2]], i32 16, i1 false) -// CHECK1-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 -// CHECK1-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E]], ptr align 4 [[TMP11]], i32 16, i1 false) +// CHECK1-NEXT: store ptr [[E]], ptr [[_TMP3]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP14]], -1 +// CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP17]], i32 0, i32 2 // CHECK1-NEXT: store i32 1111, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -709,67 +744,66 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 128 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 128 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[T_VAR]], align 128 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i32 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i32 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -797,18 +831,20 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 128 // CHECK1-NEXT: ret void // // @@ -818,7 +854,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -829,39 +865,45 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A3]], ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[THIS1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP5]], align 4 // CHECK1-NEXT: ret void // // @@ -873,7 +915,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -889,9 +931,9 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -945,9 +987,7 @@ // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C7:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[E:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 @@ -974,63 +1014,74 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[C7]], align 4 // CHECK3-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: store ptr [[E9]], ptr [[E]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], ptr [[TMP10]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E3]], ptr align 4 [[TMP2]], i32 16, i1 false) -// CHECK3-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E]], ptr align 4 [[TMP11]], i32 16, i1 false) +// CHECK3-NEXT: store ptr [[E]], ptr [[_TMP3]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -1039,9 +1090,7 @@ // CHECK3-SAME: (ptr nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1061,81 +1110,95 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], i32 [[TMP14]], i32 [[TMP18]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, ptr [[TMP2]], align 128 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[G]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP7]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -1185,34 +1248,40 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK4-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr @g, i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK4-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK4-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, ptr [[TMP2]], align 128 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[G]], align 128 +// CHECK4-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK4-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1224,14 +1293,14 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 16 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP2]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[BLOCK_CAPTURED2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK4-NEXT: call void [[TMP5]](ptr [[BLOCK]]) +// CHECK4-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP6]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK4-NEXT: call void [[TMP9]](ptr [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -1258,9 +1327,7 @@ // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C7:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[E:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 @@ -1287,53 +1354,64 @@ // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C7]], align 4 // CHECK4-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK4-NEXT: store ptr [[E9]], ptr [[E]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP9]], ptr [[TMP10]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 4 +// CHECK4-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[E3:%.*]] = alloca [4 x i32], align 4 -// CHECK4-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[E:%.*]] = alloca [4 x i32], align 4 +// CHECK4-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E3]], ptr align 4 [[TMP2]], i32 16, i1 false) -// CHECK4-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[E]], ptr align 4 [[TMP11]], i32 16, i1 false) +// CHECK4-NEXT: store ptr [[E]], ptr [[_TMP3]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 4 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1345,19 +1423,19 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED]], align 4 -// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[BLOCK_CAPTURED5]], align 4 -// CHECK4-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK4-NEXT: store ptr [[TMP5]], ptr [[BLOCK_CAPTURED6]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK4-NEXT: call void [[TMP7]](ptr [[BLOCK]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK4-NEXT: store ptr [[TMP12]], ptr [[BLOCK_CAPTURED]], align 4 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED4]], align 4 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK4-NEXT: store ptr [[TMP14]], ptr [[BLOCK_CAPTURED5]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK4-NEXT: call void [[TMP16]](ptr [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -1366,9 +1444,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -1387,55 +1463,65 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, i32, ptr }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], i32 [[TMP7]], i32 [[TMP9]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 4 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK4-NEXT: ret void // // @@ -1449,10 +1535,9 @@ // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN2SSC1ERi(ptr nonnull align 8 dereferenceable(32) [[SS]], ptr nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) @@ -1463,34 +1548,40 @@ // CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float 2.000000e+00) // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr nonnull align 4 dereferenceable(4) [[VAR]], float 3.000000e+00) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[VEC]], i64 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR_CASTED1]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[T_VAR_CASTED1]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK9-NEXT: [[CALL:%.*]] = call i32 @_Z5tmainIiET_v() // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP7]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP10]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -1530,66 +1621,70 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done3: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done8: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -1637,21 +1732,26 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP1]], i64 4, ptr inttoptr (i64 1 to ptr)) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i64 4, ptr inttoptr (i64 1 to ptr)) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTT_VAR__VOID_ADDR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTT_VAR__VOID_ADDR]], align 4 -// CHECK9-NEXT: call void @__kmpc_free(i32 [[TMP1]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i64 1 to ptr)) +// CHECK9-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTT_VAR__VOID_ADDR]], ptr inttoptr (i64 1 to ptr)) // CHECK9-NEXT: ret void // // @@ -1659,38 +1759,50 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK9-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2]], align 128 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[T_VAR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done1: +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP1]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP6]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1702,9 +1814,7 @@ // CHECK9-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[C7:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[E:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1731,65 +1841,76 @@ // CHECK9-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 // CHECK9-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK9-NEXT: store ptr [[E9]], ptr [[E]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], ptr [[TMP10]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[E3:%.*]] = alloca [4 x i32], align 16 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E3]], ptr align 4 [[TMP2]], i64 16, i1 false) -// CHECK9-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 -// CHECK9-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 -// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E]], ptr align 4 [[TMP11]], i64 16, i1 false) +// CHECK9-NEXT: store ptr [[E]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP14]], -1 +// CHECK9-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 1 +// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP15]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[TMP17]], i64 0, i64 2 // CHECK9-NEXT: store i32 1111, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -1909,67 +2030,66 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 128 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[T_VAR]], align 128 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -1997,18 +2117,20 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 128 // CHECK9-NEXT: ret void // // @@ -2018,7 +2140,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -2029,39 +2151,45 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[A]], align 4 // CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, ptr [[THIS1]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP5]], align 4 // CHECK9-NEXT: ret void // // @@ -2073,7 +2201,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2089,9 +2217,9 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2145,9 +2273,7 @@ // CHECK11-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C7:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[E:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2174,63 +2300,74 @@ // CHECK11-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 // CHECK11-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK11-NEXT: store ptr [[E9]], ptr [[E]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], ptr [[TMP10]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[E3:%.*]] = alloca [4 x i32], align 16 -// CHECK11-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK11-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E3]], ptr align 4 [[TMP2]], i64 16, i1 false) -// CHECK11-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[B_ADDR]], ptr [[TMP6]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK11-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E]], ptr align 4 [[TMP11]], i64 16, i1 false) +// CHECK11-NEXT: store ptr [[E]], ptr [[_TMP3]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK11-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[B]], ptr [[TMP15]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 // CHECK11-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK11-NEXT: ret void // @@ -2239,9 +2376,7 @@ // CHECK11-SAME: (ptr nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -2261,81 +2396,95 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK11-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK11-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK11-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK11-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[G:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK11-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK11-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK11-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load volatile i32, ptr [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[G]], align 128 +// CHECK11-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK11-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[G]], ptr [[TMP6]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[SIVAR]], ptr [[TMP7]], align 8 // CHECK11-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK11-NEXT: ret void // @@ -2385,34 +2534,40 @@ // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK12-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr @g, i64 [[TMP1]]) +// CHECK12-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store ptr @g, ptr [[TMP0]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, align 128 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK12-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK12-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load volatile i32, ptr [[TMP2]], align 128 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[G]], align 128 +// CHECK12-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK12-NEXT: store i32 2, ptr [[SIVAR]], align 4 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2424,14 +2579,14 @@ // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK12-NEXT: store volatile i32 [[TMP2]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK12-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[BLOCK_CAPTURED2]], align 32 -// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK12-NEXT: call void [[TMP5]](ptr [[BLOCK]]) +// CHECK12-NEXT: [[TMP6:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK12-NEXT: store volatile i32 [[TMP6]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK12-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK12-NEXT: store i32 [[TMP7]], ptr [[BLOCK_CAPTURED1]], align 32 +// CHECK12-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK12-NEXT: call void [[TMP9]](ptr [[BLOCK]]) // CHECK12-NEXT: ret void // // @@ -2458,9 +2613,7 @@ // CHECK12-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[C7:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[E:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK12-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2487,53 +2640,64 @@ // CHECK12-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 // CHECK12-NEXT: [[E9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 3 // CHECK12-NEXT: store ptr [[E9]], ptr [[E]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[E]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..2, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]], ptr [[TMP10]]) +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK12-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[E]], align 8 +// CHECK12-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]], ptr nonnull align 4 dereferenceable(16) [[E:%.*]]) #[[ATTR2]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[E3:%.*]] = alloca [4 x i32], align 16 -// CHECK12-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[E:%.*]] = alloca [4 x i32], align 16 +// CHECK12-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK12-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK12-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK12-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK12-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK12-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E3]], ptr align 4 [[TMP2]], i64 16, i1 false) -// CHECK12-NEXT: store ptr [[E3]], ptr [[_TMP4]], align 8 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK12-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK12-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK12-NEXT: store ptr [[TMP10]], ptr [[_TMP2]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK12-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[E]], ptr align 4 [[TMP11]], i64 16, i1 false) +// CHECK12-NEXT: store ptr [[E]], ptr [[_TMP3]], align 8 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2545,19 +2709,19 @@ // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK12-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK12-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK12-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[BLOCK_CAPTURED5]], align 8 -// CHECK12-NEXT: [[BLOCK_CAPTURED6:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK12-NEXT: store ptr [[TMP5]], ptr [[BLOCK_CAPTURED6]], align 8 -// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK12-NEXT: call void [[TMP7]](ptr [[BLOCK]]) +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK12-NEXT: store ptr [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK12-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK12-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK12-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK12-NEXT: store ptr [[TMP14]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK12-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK12-NEXT: call void [[TMP16]](ptr [[BLOCK]]) // CHECK12-NEXT: ret void // // @@ -2566,9 +2730,7 @@ // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2587,55 +2749,65 @@ // CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK12-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[TMP6]], align 8 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 8 -// CHECK12-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[BLOCK_CAPTURE_ADDR4]], align 8 +// CHECK12-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK12-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[C_CASTED]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[THIS]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP12]]) +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[TMP11]], align 8 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[THIS:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR2]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK12-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK12-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK12-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK12-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK12-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK12-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK12-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK12-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK12-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP11]], -1 +// CHECK12-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP13]], 1 +// CHECK12-NEXT: store i32 [[DIV]], ptr [[TMP12]], align 4 // CHECK12-NEXT: ret void // // @@ -2649,6 +2821,7 @@ // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2665,63 +2838,74 @@ // CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP7]], align 128 // CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR0]], align 8 // CHECK17-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 8, ptr @.omp_outlined., ptr [[TMP8]], ptr [[N_ADDR]], i64 [[TMP1]], ptr [[TMP9]], ptr [[TMP10]], i64 [[TMP3]], i64 [[TMP5]], ptr [[VLA]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP3]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP19]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[S:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[A:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = call ptr @llvm.stacksave() -// CHECK17-NEXT: store ptr [[TMP5]], ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP6]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i64 [[TMP8]], i1 false) -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP9]], i64 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP10]], i32 [[TMP11]], ptr [[TMP12]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP13]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() +// CHECK17-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP15]], align 128 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA]], ptr align 128 [[TMP13]], i64 [[TMP17]], i1 false) +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP18]], i64 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP19]], i32 [[TMP20]], ptr [[TMP21]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP22]]) // CHECK17-NEXT: ret void // // @@ -2735,6 +2919,7 @@ // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2756,73 +2941,84 @@ // CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 // CHECK17-NEXT: store i32 [[TMP8]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined..1, i64 [[TMP1]], ptr [[TMP9]], ptr [[THIS1]], i64 [[TMP3]], i64 [[TMP5]], ptr [[VLA]], ptr [[N_ADDR]], ptr [[TMP10]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP11]]) +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[THIS1]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[VLA]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP19]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[THIS:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr [[S:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 8 -// CHECK17-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 127 -// CHECK17-NEXT: [[TMP9:%.*]] = udiv i64 [[TMP8]], 128 -// CHECK17-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 128 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: [[DOTVLA2__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP12]], i64 [[TMP10]], ptr inttoptr (i64 8 to ptr)) -// CHECK17-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[DOTVLA2__VOID_ADDR]], ptr align 128 [[TMP4]], i64 [[TMP14]], i1 false) -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[A]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP15]] to double -// CHECK17-NEXT: [[TMP16:%.*]] = mul nsw i64 1, [[TMP3]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[DOTVLA2__VOID_ADDR]], i64 [[TMP16]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 8 +// CHECK17-NEXT: [[TMP17:%.*]] = add nuw i64 [[TMP16]], 127 +// CHECK17-NEXT: [[TMP18:%.*]] = udiv i64 [[TMP17]], 128 +// CHECK17-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 128 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK17-NEXT: [[DOTVLA2__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP21]], i64 [[TMP19]], ptr inttoptr (i64 8 to ptr)) +// CHECK17-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[DOTVLA2__VOID_ADDR]], ptr align 128 [[TMP11]], i64 [[TMP23]], i1 false) +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 0 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[A]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to double +// CHECK17-NEXT: [[TMP25:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[DOTVLA2__VOID_ADDR]], i64 [[TMP25]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB]] to i64 -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX7]], align 8 -// CHECK17-NEXT: [[CONV8:%.*]] = fpext double [[CONV]] to x86_fp80 -// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B9]], align 4 -// CHECK17-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP18]], i64 [[IDXPROM10]] -// CHECK17-NEXT: store x86_fp80 [[CONV8]], ptr [[ARRAYIDX11]], align 16 -// CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP12]], ptr [[DOTVLA2__VOID_ADDR]], ptr inttoptr (i64 8 to ptr)) +// CHECK17-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX1]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to x86_fp80 +// CHECK17-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK17-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP27]], i64 [[IDXPROM4]] +// CHECK17-NEXT: store x86_fp80 [[CONV2]], ptr [[ARRAYIDX5]], align 16 +// CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP21]], ptr [[DOTVLA2__VOID_ADDR]], ptr inttoptr (i64 8 to ptr)) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -245,24 +245,25 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store double 5.000000e+00, ptr [[A]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -276,90 +277,94 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK1-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK1-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK1-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 // CHECK1-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 // CHECK1-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 // CHECK1-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK1-NEXT: store i64 1, ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 // CHECK1-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] // CHECK1-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 // CHECK1-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -372,23 +377,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -398,79 +409,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -481,23 +494,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -507,79 +526,81 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK1-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -590,23 +611,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -616,96 +643,98 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK1-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -716,23 +745,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -742,65 +777,67 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK1-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -818,23 +855,29 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -844,65 +887,67 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK1-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK1-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -922,26 +967,33 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[X]], align 4 // CHECK1-NEXT: store i32 0, ptr [[Y]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -957,21 +1009,23 @@ // CHECK1-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -979,85 +1033,85 @@ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK1-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP13]], ptr [[I]], align 1 // CHECK1-NEXT: store i32 11, ptr [[X]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK1-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK1-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK1-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK1-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK1-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]] -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]] +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]] +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]] // CHECK1-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK1-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1078,24 +1132,30 @@ // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[X]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1107,78 +1167,80 @@ // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]] +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]] // CHECK1-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1203,7 +1265,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -1212,23 +1274,26 @@ // CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1240,106 +1305,110 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK1: omp.dispatch.cleanup: // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK1-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] -// CHECK1-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK1-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK1-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP25:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP30:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP26:%.*]] = extractvalue { ptr, i32 } [[TMP25]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP26]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[TMP31:%.*]] = extractvalue { ptr, i32 } [[TMP30]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP31]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -1355,24 +1424,25 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK2-NEXT: store double 5.000000e+00, ptr [[A]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK2-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -1386,90 +1456,94 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK2-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK2-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK2-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK2-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 // CHECK2-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 // CHECK2-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 // CHECK2-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK2-NEXT: store i64 1, ptr [[I]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK2-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK2-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 // CHECK2-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] // CHECK2-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 // CHECK2-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] // CHECK2-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK2-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -1482,23 +1556,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1508,79 +1588,81 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK2-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1591,23 +1673,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1617,79 +1705,81 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK2-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK2-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK2-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK2-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1700,23 +1790,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1726,96 +1822,98 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK2-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK2-NEXT: ret void // // @@ -1826,23 +1924,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1852,65 +1956,67 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK2-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 35, i64 0, i64 16908287, i64 1, i64 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 35, i64 0, i64 16908287, i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK2-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK2-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -1928,23 +2034,29 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1954,65 +2066,67 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK2-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 36, i64 0, i64 16908287, i64 1, i64 7) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 36, i64 0, i64 16908287, i64 1, i64 7) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK2-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK2-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK2-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2032,26 +2146,33 @@ // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK2-NEXT: store i32 0, ptr [[X]], align 4 // CHECK2-NEXT: store i32 0, ptr [[Y]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2067,21 +2188,23 @@ // CHECK2-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK2-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK2-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -2089,85 +2212,85 @@ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK2-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: store i8 [[TMP13]], ptr [[I]], align 1 // CHECK2-NEXT: store i32 11, ptr [[X]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP12]], i32 38, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP18]], i32 38, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK2-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK2-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK2-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK2-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK2-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]] -// CHECK2-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]] -// CHECK2-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK2-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]] +// CHECK2-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]] +// CHECK2-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]] +// CHECK2-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK2-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]] // CHECK2-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK2-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK2-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2188,24 +2311,30 @@ // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK2-NEXT: store i32 0, ptr [[X]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2217,78 +2346,80 @@ // CHECK2-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 37, i32 0, i32 199, i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 37, i32 0, i32 199, i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK2-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK2-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] -// CHECK2-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]] +// CHECK2-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]] // CHECK2-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2313,7 +2444,7 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -2322,23 +2453,26 @@ // CHECK2-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK2-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK2-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]) +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2350,106 +2484,110 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 -// CHECK2-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK2-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK2-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() +// CHECK2-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 +// CHECK2-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK2: omp.dispatch.cleanup: // CHECK2-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK2-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]] -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] -// CHECK2-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK2-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK2-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK2-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP25:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP30:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP26:%.*]] = extractvalue { ptr, i32 } [[TMP25]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP26]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: [[TMP31:%.*]] = extractvalue { ptr, i32 } [[TMP30]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP31]]) #[[ATTR7:[0-9]+]] // CHECK2-NEXT: unreachable // // @@ -2465,24 +2603,25 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK5-NEXT: store double 5.000000e+00, ptr [[A]], align 8, !dbg [[DBG10:![0-9]+]] // CHECK5-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8, !dbg [[DBG11:![0-9]+]] // CHECK5-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8, !dbg [[DBG11]] // CHECK5-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !dbg [[DBG11]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !dbg [[DBG11]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]), !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG11]] // CHECK5-NEXT: ret void, !dbg [[DBG12:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG13:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -2496,91 +2635,95 @@ // CHECK5-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8, !dbg [[DBG14:![0-9]+]] -// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]], !dbg [[DBG14]] -// CHECK5-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00, !dbg [[DBG14]] -// CHECK5-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00, !dbg [[DBG14]] -// CHECK5-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG14]] -// CHECK5-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 1, ptr [[I]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG15:![0-9]+]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG14:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8, !dbg [[DBG15:![0-9]+]] +// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]], !dbg [[DBG15]] +// CHECK5-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00, !dbg [[DBG15]] +// CHECK5-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00, !dbg [[DBG15]] +// CHECK5-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 1, ptr [[I]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.precond.then: -// CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG15]] -// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG15]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]), !dbg [[DBG15]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG14]] +// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]), !dbg [[DBG14]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG15]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG15]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG15]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG16:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG15]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG15]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !dbg [[DBG15]] -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]]), !dbg [[DBG15]] -// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3:[0-9]+]], i32 [[TMP26]]), !dbg [[DBG14]] +// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG14]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void, !dbg [[DBG16]] // @@ -2592,23 +2735,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG22]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG22]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG22]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG22]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG22]] // CHECK5-NEXT: ret void, !dbg [[DBG23:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG24:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG24:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2618,79 +2767,81 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG25:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG25:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG25]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG26:![0-9]+]] // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG25]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB6:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423, !dbg [[DBG26]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG26]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG26]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG26]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG26]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG26]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7, !dbg [[DBG26]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]], !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG27:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG27:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]], !dbg [[DBG27]] // CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !dbg [[DBG27]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG28:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG26]] // CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG25]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG25]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG25]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG25]] // CHECK5-NEXT: ret void, !dbg [[DBG28]] // // @@ -2701,23 +2852,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB14:[0-9]+]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG31]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG31]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG31]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG31]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB14:[0-9]+]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG31]] // CHECK5-NEXT: ret void, !dbg [[DBG32:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG33:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG33:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2727,79 +2884,81 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG34]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG35:![0-9]+]] // CHECK5-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB11:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG34]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB11:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423, !dbg [[DBG35]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG35]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG35]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG35]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG35]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG35]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7, !dbg [[DBG35]] // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]], !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG36:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG36]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG36]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG36:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]], !dbg [[DBG36]] // CHECK5-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4, !dbg [[DBG36]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG37:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG35]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG34]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG34]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB13:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG34]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB13:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG34]] // CHECK5-NEXT: ret void, !dbg [[DBG37]] // // @@ -2810,23 +2969,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB19:[0-9]+]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG40]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG40]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG40]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG40]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB19:[0-9]+]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG40]] // CHECK5-NEXT: ret void, !dbg [[DBG41:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG42:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG42:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2836,96 +3001,98 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG43]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44:![0-9]+]] // CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB16:[0-9]+]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG43]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB16:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG43]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288, !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG44]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG44]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG44]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG44]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]], !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]], !dbg [[DBG44]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127, !dbg [[DBG44]] // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45:![0-9]+]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]], !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG45]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64, !dbg [[DBG45]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG45:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]], !dbg [[DBG45]] // CHECK5-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4, !dbg [[DBG45]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG46:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1, !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]], !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]], !dbg [[DBG44]] // CHECK5-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG44]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB18:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG43]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB18:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG43]] // CHECK5-NEXT: ret void, !dbg [[DBG46]] // // @@ -2936,23 +3103,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB21:[0-9]+]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG50]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG50]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG50]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG50]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB21:[0-9]+]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG50]] // CHECK5-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG52:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG52:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -2962,65 +3135,67 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG53]] // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG54:![0-9]+]] // CHECK5-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG54]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG53]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB21]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG53]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB21]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG53]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB21]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG53]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB21]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG53]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG53]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG54]] -// CHECK5-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG54]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]], !dbg [[DBG54]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127, !dbg [[DBG54]] // CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG54]] // CHECK5-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG56]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]], !dbg [[DBG56]] // CHECK5-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG56]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG57:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1, !dbg [[DBG54]] // CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG53]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3038,23 +3213,29 @@ // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23:[0-9]+]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG62]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG62]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG62]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG62]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23:[0-9]+]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG62]] // CHECK5-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG64:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG64:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3064,65 +3245,67 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG65]] // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG66:![0-9]+]] // CHECK5-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG66]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG65]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB23]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG65]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB23]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG65]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB23]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG65]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB23]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG65]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG65]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG66]] -// CHECK5-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG66]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]], !dbg [[DBG66]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127, !dbg [[DBG66]] // CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG66]] // CHECK5-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG68]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]], !dbg [[DBG68]] // CHECK5-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !dbg [[DBG68]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG69:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1, !dbg [[DBG66]] // CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group [[ACC_GRP67]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG65]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3142,26 +3325,33 @@ // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[X]], align 4, !dbg [[DBG74:![0-9]+]] // CHECK5-NEXT: store i32 0, ptr [[Y]], align 4, !dbg [[DBG75:![0-9]+]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB25:[0-9]+]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4, !dbg [[DBG76]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8, !dbg [[DBG76]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB25:[0-9]+]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG76]] // CHECK5-NEXT: ret void, !dbg [[DBG77:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG78:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG78:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3177,21 +3367,23 @@ // CHECK5-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG80:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8, !dbg [[DBG80]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG80:![0-9]+]] +// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8, !dbg [[DBG80]] // CHECK5-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32, !dbg [[DBG80]] // CHECK5-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]], !dbg [[DBG80]] // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1, !dbg [[DBG80]] // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1, !dbg [[DBG80]] @@ -3199,85 +3391,85 @@ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11, !dbg [[DBG81:![0-9]+]] // CHECK5-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1, !dbg [[DBG81]] // CHECK5-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: store i8 [[TMP7]], ptr [[I]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: store i8 [[TMP13]], ptr [[I]], align 1, !dbg [[DBG80]] // CHECK5-NEXT: store i32 11, ptr [[X]], align 4, !dbg [[DBG81]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32, !dbg [[DBG80]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57, !dbg [[DBG80]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] -// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] -// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG79]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB25]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1), !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB25]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1), !dbg [[DBG79]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB25]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG79]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB25]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG79]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0, !dbg [[DBG79]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82:![0-9]+]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]], !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]], !dbg [[DBG80]] // CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11, !dbg [[DBG80]] // CHECK5-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1, !dbg [[DBG80]] // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]], !dbg [[DBG80]] // CHECK5-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8, !dbg [[DBG80]] // CHECK5-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11, !dbg [[DBG80]] // CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11, !dbg [[DBG80]] -// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]], !dbg [[DBG80]] +// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]], !dbg [[DBG80]] // CHECK5-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1, !dbg [[DBG81]] // CHECK5-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]], !dbg [[DBG81]] // CHECK5-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32, !dbg [[DBG81]] // CHECK5-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !dbg [[DBG81]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG83:![0-9]+]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG83:![0-9]+]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]], !dbg [[DBG83]] -// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG83]] -// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]], !dbg [[DBG83]] +// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]], !dbg [[DBG83]] // CHECK5-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !dbg [[DBG83]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG84:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] -// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] +// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1, !dbg [[DBG80]] // CHECK5-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group [[ACC_GRP82]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG79]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3298,24 +3490,30 @@ // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[X]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB27:[0-9]+]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]), !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG90]] +// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG90]] +// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG90]] +// CHECK5-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG90]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB27:[0-9]+]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG90]] // CHECK5-NEXT: ret void, !dbg [[DBG91:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG92:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] !dbg [[DBG92:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3327,78 +3525,80 @@ // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG93:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG93:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG93]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG94:![0-9]+]] // CHECK5-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG94]] -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB27]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG93]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB27]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG93]] // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB27]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG93]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB27]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]), !dbg [[DBG93]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0, !dbg [[DBG93]] // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG94]] -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]], !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG94]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20, !dbg [[DBG94]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1, !dbg [[DBG94]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]], !dbg [[DBG94]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8, !dbg [[DBG94]] // CHECK5-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20, !dbg [[DBG94]] // CHECK5-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20, !dbg [[DBG94]] -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]], !dbg [[DBG94]] +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]], !dbg [[DBG94]] // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1, !dbg [[DBG96:![0-9]+]] // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]], !dbg [[DBG96]] // CHECK5-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !dbg [[DBG96]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]], !dbg [[DBG97]] -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64, !dbg [[DBG97]] -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]], !dbg [[DBG97]] // CHECK5-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !dbg [[DBG97]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG98:![0-9]+]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] -// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG94]] // CHECK5-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group [[ACC_GRP95]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3423,7 +3623,7 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] @@ -3432,23 +3632,26 @@ // CHECK5-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG105]] // CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG105]] // CHECK5-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG105]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG106:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4, !dbg [[DBG106]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8, !dbg [[DBG106]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB32:[0-9]+]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]), !dbg [[DBG106]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG107:![0-9]+]] -// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]), !dbg [[DBG107]] -// CHECK5-NEXT: ret void, !dbg [[DBG107]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG106:![0-9]+]] +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG106]] +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG107:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB32:[0-9]+]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG108:![0-9]+]] +// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]), !dbg [[DBG108]] +// CHECK5-NEXT: ret void, !dbg [[DBG108]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 !dbg [[DBG108:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] personality ptr @__gxx_personality_v0 !dbg [[DBG109:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3460,107 +3663,111 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG109:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110:![0-9]+]] -// CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG109]] -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG109]] -// CHECK5-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB29:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG109]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG110:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[N]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111:![0-9]+]] +// CHECK5-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave(), !dbg [[DBG110]] +// CHECK5-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16, !dbg [[DBG110]] +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB29:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG110]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288, !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288, !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG111]] // CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG110]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG111]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG111]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]], !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]], !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.cleanup: -// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]], !dbg [[DBG110]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]], !dbg [[DBG111]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.cond.cleanup: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[CALL:%.*]] = invoke noundef i32 @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG111:![0-9]+]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG112:![0-9]+]] // CHECK5: invoke.cont: -// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64, !dbg [[DBG111]] -// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]], !dbg [[DBG111]] -// CHECK5-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4, !dbg [[DBG111]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG111]] +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG112]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64, !dbg [[DBG112]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]], !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]], !dbg [[DBG112]] +// CHECK5-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4, !dbg [[DBG112]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG112]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG110]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1, !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP112:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG110]], !llvm.loop [[LOOP113:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG110]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]], !dbg [[DBG110]] -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP113:![0-9]+]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]], !dbg [[DBG111]] +// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG110]], !llvm.loop [[LOOP114:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB31:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]), !dbg [[DBG109]] -// CHECK5-NEXT: ret void, !dbg [[DBG111]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB31:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG110]] +// CHECK5-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]), !dbg [[DBG110]] +// CHECK5-NEXT: ret void, !dbg [[DBG112]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP25:%.*]] = landingpad { ptr, i32 } -// CHECK5-NEXT: catch ptr null, !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP26:%.*]] = extractvalue { ptr, i32 } [[TMP25]], 0, !dbg [[DBG111]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP26]]) #[[ATTR7:[0-9]+]], !dbg [[DBG111]] -// CHECK5-NEXT: unreachable, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP30:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: catch ptr null, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP31:%.*]] = extractvalue { ptr, i32 } [[TMP30]], 0, !dbg [[DBG112]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP31]]) #[[ATTR7:[0-9]+]], !dbg [[DBG112]] +// CHECK5-NEXT: unreachable, !dbg [[DBG112]] // // // CHECK5-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -3575,24 +3782,25 @@ // CHECK6-NEXT: entry: // CHECK6-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK6-NEXT: store double 5.000000e+00, ptr [[A]], align 8 // CHECK6-NEXT: [[TMP0:%.*]] = load double, ptr [[A]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8 // CHECK6-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP2]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca double, align 8 @@ -3606,90 +3814,94 @@ // CHECK6-NEXT: [[I4:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load double, ptr undef, align 8 -// CHECK6-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]] +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK6-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8 +// CHECK6-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]] // CHECK6-NEXT: store double [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00 +// CHECK6-NEXT: [[TMP4:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK6-NEXT: [[SUB:%.*]] = fsub double [[TMP4]], 1.000000e+00 // CHECK6-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00 // CHECK6-NEXT: [[CONV:%.*]] = fptoui double [[DIV]] to i64 // CHECK6-NEXT: [[SUB3:%.*]] = sub i64 [[CONV]], 1 // CHECK6-NEXT: store i64 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK6-NEXT: store i64 1, ptr [[I]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 -// CHECK6-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]] +// CHECK6-NEXT: [[TMP5:%.*]] = load double, ptr [[DOTCAPTURE_EXPR_1]], align 8 +// CHECK6-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP5]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK6: omp.precond.then: // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: store i64 [[TMP3]], ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK6-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i64 -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV5:%.*]] = sext i8 [[TMP7]] to i64 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV5]]) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[CMP6:%.*]] = icmp ugt i64 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK6-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP13]], 1 -// CHECK6-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP12]], [[ADD7]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 +// CHECK6-NEXT: [[CMP8:%.*]] = icmp ult i64 [[TMP15]], [[ADD7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[ADD9:%.*]] = add i64 [[TMP15]], 1 -// CHECK6-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP14]], [[ADD9]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[ADD9:%.*]] = add i64 [[TMP18]], 1 +// CHECK6-NEXT: [[CMP10:%.*]] = icmp ult i64 [[TMP17]], [[ADD9]] // CHECK6-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP19]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add i64 1, [[MUL]] // CHECK6-NEXT: store i64 [[ADD11]], ptr [[I4]], align 8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[TMP17]], 1 +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[TMP20]], 1 // CHECK6-NEXT: store i64 [[ADD12]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK6-NEXT: [[ADD13:%.*]] = add i64 [[TMP18]], [[TMP19]] +// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK6-NEXT: [[ADD13:%.*]] = add i64 [[TMP21]], [[TMP22]] // CHECK6-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK6-NEXT: [[ADD14:%.*]] = add i64 [[TMP20]], [[TMP21]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK6-NEXT: [[ADD14:%.*]] = add i64 [[TMP23]], [[TMP24]] // CHECK6-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void @@ -3702,23 +3914,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3728,79 +3946,81 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK6-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK6-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK6-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK6-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -3811,23 +4031,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..2, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3837,79 +4063,81 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 4571423, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 4571423 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 7 // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK6-NEXT: store i32 [[SUB]], ptr [[I]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDXPROM2]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 -// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]] -// CHECK6-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM5]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 -// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM2]] +// CHECK6-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP19]], [[TMP22]] +// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM5]] +// CHECK6-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP25]] +// CHECK6-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM8]] // CHECK6-NEXT: store float [[MUL7]], ptr [[ARRAYIDX9]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -3920,23 +4148,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3946,96 +4180,98 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP11]], 16908288 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP18]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[IDXPROM3]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM3]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4 +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP21]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP26]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP27:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP27]] +// CHECK6-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM9]] // CHECK6-NEXT: store float [[MUL8]], ptr [[ARRAYIDX10]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 +// CHECK6-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP30]], 1 // CHECK6-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]] +// CHECK6-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP31]], [[TMP32]] // CHECK6-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]] +// CHECK6-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD13:%.*]] = add i32 [[TMP33]], [[TMP34]] // CHECK6-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK6-NEXT: ret void // // @@ -4046,23 +4282,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4072,65 +4314,67 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK6-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK6-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK6-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK6-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4148,23 +4392,29 @@ // CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4174,65 +4424,67 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK6-NEXT: store i64 16908287, ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8u(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_8u(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP14]], 1 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP13]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP15]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] // CHECK6-NEXT: store i64 [[ADD1]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 [[TMP21]] +// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[TMP17]] +// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[TMP20]] +// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX2]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP18]], [[TMP21]] +// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load float, ptr [[ARRAYIDX4]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP24]] +// CHECK6-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, ptr [[I]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 [[TMP26]] // CHECK6-NEXT: store float [[MUL5]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP27]], 1 // CHECK6-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4252,26 +4504,33 @@ // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK6-NEXT: store i32 0, ptr [[X]], align 4 // CHECK6-NEXT: store i32 0, ptr [[Y]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..6, ptr [[Y]], ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4287,21 +4546,23 @@ // CHECK6-NEXT: [[X8:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[TMP11]] to i8 // CHECK6-NEXT: store i8 [[CONV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32 +// CHECK6-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i8 [[TMP12]] to i32 // CHECK6-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]] // CHECK6-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1 // CHECK6-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 @@ -4309,85 +4570,85 @@ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11 // CHECK6-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK6-NEXT: store i64 [[SUB5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: store i8 [[TMP7]], ptr [[I]], align 1 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: store i8 [[TMP13]], ptr [[I]], align 1 // CHECK6-NEXT: store i32 11, ptr [[X]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP14]] to i32 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57 // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK6: omp.precond.then: // CHECK6-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1) +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK6-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_8(ptr @[[GLOB2]], i32 [[TMP18]], i32 1073741862, i64 0, i64 [[TMP16]], i64 1, i64 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_dispatch_next_8(ptr @[[GLOB2]], i32 [[TMP20]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP21]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK6-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK6-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK6-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP23]], [[TMP24]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK6-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 +// CHECK6-NEXT: [[TMP25:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[CONV10:%.*]] = sext i8 [[TMP25]] to i64 +// CHECK6-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP26]], 11 // CHECK6-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK6-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 // CHECK6-NEXT: store i8 [[CONV14]], ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 +// CHECK6-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP28]], 11 // CHECK6-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 -// CHECK6-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] +// CHECK6-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP27]], [[MUL16]] // CHECK6-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK6-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK6-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 // CHECK6-NEXT: store i32 [[CONV20]], ptr [[X8]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP24:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP23]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP25:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[TMP27:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 -// CHECK6-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 [[IDXPROM21]] -// CHECK6-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK6-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: [[TMP30:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 -// CHECK6-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM24]] -// CHECK6-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK6-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP30]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP31:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: [[TMP33:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 -// CHECK6-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM27]] +// CHECK6-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP33]] to i64 +// CHECK6-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, ptr [[TMP32]], i64 [[IDXPROM21]] +// CHECK6-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX22]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[MUL23:%.*]] = fmul float [[TMP31]], [[TMP34]] +// CHECK6-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP36:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP36]] to i64 +// CHECK6-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, ptr [[TMP35]], i64 [[IDXPROM24]] +// CHECK6-NEXT: [[TMP37:%.*]] = load float, ptr [[ARRAYIDX25]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP37]] +// CHECK6-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[TMP39:%.*]] = load i8, ptr [[I7]], align 1, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP39]] to i64 +// CHECK6-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, ptr [[TMP38]], i64 [[IDXPROM27]] // CHECK6-NEXT: store float [[MUL26]], ptr [[ARRAYIDX28]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK6-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 +// CHECK6-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK6-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK6-NEXT: store i64 [[ADD29]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP11]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4408,24 +4669,30 @@ // CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK6-NEXT: store i32 0, ptr [[X]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..7, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK6-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK6-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4437,78 +4704,80 @@ // CHECK6-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 199, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 1073741861, i32 0, i32 199, i32 1, i32 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP10]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 20 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 // CHECK6-NEXT: store i8 [[CONV]], ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP17]], 20 // CHECK6-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 -// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] +// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], [[MUL3]] // CHECK6-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] // CHECK6-NEXT: store i32 [[ADD5]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP1]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP14:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP17:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP20:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 -// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 [[IDXPROM9]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP0]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[TMP23:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 -// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 [[IDXPROM12]] +// CHECK6-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP19:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP19]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP22:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP22]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP20]], [[TMP23]] +// CHECK6-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP25:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP25]] to i64 +// CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 [[IDXPROM9]] +// CHECK6-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP26]] +// CHECK6-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[TMP28:%.*]] = load i8, ptr [[I]], align 1, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP28]] to i64 +// CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, ptr [[TMP27]], i64 [[IDXPROM12]] // CHECK6-NEXT: store float [[MUL11]], ptr [[ARRAYIDX13]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK6-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK6-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4533,7 +4802,7 @@ // CHECK6-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK6-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -4542,23 +4811,26 @@ // CHECK6-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 // CHECK6-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16 // CHECK6-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..8, ptr [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]) -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP5]]) +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK6-NEXT: [[TMP7:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP7]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[N:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4570,98 +4842,102 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK6-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 16908288, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave() -// CHECK6-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8 -// CHECK6-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16 -// CHECK6-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) +// CHECK6-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() +// CHECK6-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 +// CHECK6-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP4]], align 16 +// CHECK6-NEXT: store i64 [[TMP4]], ptr [[__VLA_EXPR0]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 5) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP10]], 16908288 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]] -// CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP13]], [[TMP14]] +// CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] // CHECK6: omp.dispatch.cleanup: // CHECK6-NEXT: br label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP15]], [[TMP16]] +// CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP17]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK6-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3foov() // CHECK6-NEXT: [[CONV:%.*]] = sitofp i32 [[CALL]] to float -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA1]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[ADD4:%.*]] = fadd float [[CONV]], [[TMP14]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP15]] to float -// CHECK6-NEXT: [[ADD6:%.*]] = fadd float [[ADD4]], [[CONV5]] -// CHECK6-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK6-NEXT: [[IDXPROM7:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK6-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM7]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX8]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = fadd float [[TMP18]], [[ADD6]] -// CHECK6-NEXT: store float [[ADD9]], ptr [[ARRAYIDX8]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VLA]], i64 [[IDXPROM]] +// CHECK6-NEXT: [[TMP19:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[ADD3:%.*]] = fadd float [[CONV]], [[TMP19]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[N]], align 4 +// CHECK6-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP20]] to float +// CHECK6-NEXT: [[ADD5:%.*]] = fadd float [[ADD3]], [[CONV4]] +// CHECK6-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP21]], i64 [[IDXPROM6]] +// CHECK6-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = fadd float [[TMP23]], [[ADD5]] +// CHECK6-NEXT: store float [[ADD8]], ptr [[ARRAYIDX7]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = add i32 [[TMP19]], 1 -// CHECK6-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add i32 [[TMP24]], 1 +// CHECK6-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP20]], [[TMP21]] -// CHECK6-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP22]], [[TMP23]] -// CHECK6-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add i32 [[TMP25]], [[TMP26]] +// CHECK6-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP27]], [[TMP28]] +// CHECK6-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) +// CHECK6-NEXT: [[TMP29:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK6-NEXT: call void @llvm.stackrestore(ptr [[TMP29]]) // CHECK6-NEXT: ret void // // @@ -4780,17 +5056,20 @@ // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARR:%.*]] = alloca [10 x i32], align 16 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK11-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARR]], i8 0, i64 40, i1 false) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[ARR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[ARR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[__RANGE1:%.*]] = alloca ptr, align 8 @@ -4807,22 +5086,24 @@ // CHECK11-NEXT: [[A:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP3]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 10 // CHECK11-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 0 // CHECK11-NEXT: store ptr [[ARRAYDECAY1]], ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__END1]], align 8 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP4]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP5]] to i64 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__END1]], align 8 +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP7]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK11-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 @@ -4830,65 +5111,65 @@ // CHECK11-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 // CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i64 [[DIV]], 1 // CHECK11-NEXT: store i64 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[__BEGIN1]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[__BEGIN1]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_2]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP9]], [[TMP10]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP20]], 1 -// CHECK11-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[MUL]] +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP22]], 1 +// CHECK11-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[MUL]] // CHECK11-NEXT: store ptr [[ADD_PTR8]], ptr [[__BEGIN15]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[__BEGIN15]], align 8 -// CHECK11-NEXT: store ptr [[TMP21]], ptr [[A]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[__BEGIN15]], align 8 +// CHECK11-NEXT: store ptr [[TMP23]], ptr [[A]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[A]], align 8 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP23]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP25]], 1 // CHECK11-NEXT: store i64 [[ADD9]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -4898,17 +5179,20 @@ // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARR:%.*]] = alloca [10 x i32], align 16 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ARR]], i8 0, i64 40, i1 false) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[ARR]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[ARR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[ARR:%.*]]) #[[ATTR5]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -4933,41 +5217,43 @@ // CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP1]], i64 0, i64 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP3]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 10 // CHECK11-NEXT: store ptr [[ADD_PTR]], ptr [[__END1]], align 8 -// CHECK11-NEXT: store ptr [[TMP0]], ptr [[__RANGE2]], align 8 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[__RANGE2]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 0 // CHECK11-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY2]], i64 10 // CHECK11-NEXT: store ptr [[ADD_PTR3]], ptr [[__END2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[__RANGE1]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP3]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__RANGE1]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP5]], i64 0, i64 0 // CHECK11-NEXT: store ptr [[ARRAYDECAY4]], ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__END1]], align 8 -// CHECK11-NEXT: store ptr [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[__RANGE2]], align 8 -// CHECK11-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP5]], i64 0, i64 0 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[__END1]], align 8 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[__RANGE2]], align 8 +// CHECK11-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP7]], i64 0, i64 0 // CHECK11-NEXT: store ptr [[ARRAYDECAY7]], ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[__END2]], align 8 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__END2]], align 8 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[TMP10]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] // CHECK11-NEXT: [[SUB_PTR_DIV:%.*]] = sdiv exact i64 [[SUB_PTR_SUB]], 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i64 [[SUB_PTR_DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST10:%.*]] = ptrtoint ptr [[TMP9]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST10:%.*]] = ptrtoint ptr [[TMP11]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST11:%.*]] = ptrtoint ptr [[TMP12]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB12:%.*]] = sub i64 [[SUB_PTR_LHS_CAST10]], [[SUB_PTR_RHS_CAST11]] // CHECK11-NEXT: [[SUB_PTR_DIV13:%.*]] = sdiv exact i64 [[SUB_PTR_SUB12]], 4 // CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i64 [[SUB_PTR_DIV13]], 1 @@ -4976,84 +5262,84 @@ // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[DIV]], [[DIV16]] // CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB17]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: store ptr [[TMP11]], ptr [[__BEGIN1]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: store ptr [[TMP12]], ptr [[__BEGIN2]], align 8 // CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP13]], [[TMP14]] +// CHECK11-NEXT: store ptr [[TMP13]], ptr [[__BEGIN1]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: store ptr [[TMP14]], ptr [[__BEGIN2]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ult ptr [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[CMP18:%.*]] = icmp ult ptr [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[CMP18:%.*]] = icmp ult ptr [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP18]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: store i64 [[TMP19]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK11-NEXT: [[CMP21:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: [[CMP21:%.*]] = icmp sgt i64 [[TMP22]], [[TMP23]] // CHECK11-NEXT: br i1 [[CMP21]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP26]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP22:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP22:%.*]] = icmp sle i64 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST23:%.*]] = ptrtoint ptr [[TMP29]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST24:%.*]] = ptrtoint ptr [[TMP30]] to i64 +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST23:%.*]] = ptrtoint ptr [[TMP31]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST24:%.*]] = ptrtoint ptr [[TMP32]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB25:%.*]] = sub i64 [[SUB_PTR_LHS_CAST23]], [[SUB_PTR_RHS_CAST24]] // CHECK11-NEXT: [[SUB_PTR_DIV26:%.*]] = sdiv exact i64 [[SUB_PTR_SUB25]], 4 // CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i64 [[SUB_PTR_DIV26]], 1 // CHECK11-NEXT: [[ADD28:%.*]] = add nsw i64 [[SUB27]], 1 // CHECK11-NEXT: [[DIV29:%.*]] = sdiv i64 [[ADD28]], 1 // CHECK11-NEXT: [[MUL30:%.*]] = mul nsw i64 1, [[DIV29]] -// CHECK11-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP28]], [[MUL30]] +// CHECK11-NEXT: [[DIV31:%.*]] = sdiv i64 [[TMP30]], [[MUL30]] // CHECK11-NEXT: [[MUL32:%.*]] = mul nsw i64 [[DIV31]], 1 -// CHECK11-NEXT: [[ADD_PTR33:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 [[MUL32]] +// CHECK11-NEXT: [[ADD_PTR33:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i64 [[MUL32]] // CHECK11-NEXT: store ptr [[ADD_PTR33]], ptr [[__BEGIN119]], align 8 -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST34:%.*]] = ptrtoint ptr [[TMP34]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST35:%.*]] = ptrtoint ptr [[TMP35]] to i64 +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST34:%.*]] = ptrtoint ptr [[TMP36]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST35:%.*]] = ptrtoint ptr [[TMP37]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB36:%.*]] = sub i64 [[SUB_PTR_LHS_CAST34]], [[SUB_PTR_RHS_CAST35]] // CHECK11-NEXT: [[SUB_PTR_DIV37:%.*]] = sdiv exact i64 [[SUB_PTR_SUB36]], 4 // CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i64 [[SUB_PTR_DIV37]], 1 // CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[SUB38]], 1 // CHECK11-NEXT: [[DIV40:%.*]] = sdiv i64 [[ADD39]], 1 // CHECK11-NEXT: [[MUL41:%.*]] = mul nsw i64 1, [[DIV40]] -// CHECK11-NEXT: [[DIV42:%.*]] = sdiv i64 [[TMP33]], [[MUL41]] -// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 -// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK11-NEXT: [[SUB_PTR_LHS_CAST43:%.*]] = ptrtoint ptr [[TMP36]] to i64 -// CHECK11-NEXT: [[SUB_PTR_RHS_CAST44:%.*]] = ptrtoint ptr [[TMP37]] to i64 +// CHECK11-NEXT: [[DIV42:%.*]] = sdiv i64 [[TMP35]], [[MUL41]] +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK11-NEXT: [[SUB_PTR_LHS_CAST43:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK11-NEXT: [[SUB_PTR_RHS_CAST44:%.*]] = ptrtoint ptr [[TMP39]] to i64 // CHECK11-NEXT: [[SUB_PTR_SUB45:%.*]] = sub i64 [[SUB_PTR_LHS_CAST43]], [[SUB_PTR_RHS_CAST44]] // CHECK11-NEXT: [[SUB_PTR_DIV46:%.*]] = sdiv exact i64 [[SUB_PTR_SUB45]], 4 // CHECK11-NEXT: [[SUB47:%.*]] = sub nsw i64 [[SUB_PTR_DIV46]], 1 @@ -5061,32 +5347,32 @@ // CHECK11-NEXT: [[DIV49:%.*]] = sdiv i64 [[ADD48]], 1 // CHECK11-NEXT: [[MUL50:%.*]] = mul nsw i64 1, [[DIV49]] // CHECK11-NEXT: [[MUL51:%.*]] = mul nsw i64 [[DIV42]], [[MUL50]] -// CHECK11-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP32]], [[MUL51]] +// CHECK11-NEXT: [[SUB52:%.*]] = sub nsw i64 [[TMP34]], [[MUL51]] // CHECK11-NEXT: [[MUL53:%.*]] = mul nsw i64 [[SUB52]], 1 -// CHECK11-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i32, ptr [[TMP31]], i64 [[MUL53]] +// CHECK11-NEXT: [[ADD_PTR54:%.*]] = getelementptr inbounds i32, ptr [[TMP33]], i64 [[MUL53]] // CHECK11-NEXT: store ptr [[ADD_PTR54]], ptr [[__BEGIN220]], align 8 -// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[__BEGIN119]], align 8 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[A]], align 8 -// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[__BEGIN220]], align 8 -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 -// CHECK11-NEXT: store i32 [[TMP40]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[__BEGIN119]], align 8 +// CHECK11-NEXT: store ptr [[TMP40]], ptr [[A]], align 8 +// CHECK11-NEXT: [[TMP41:%.*]] = load ptr, ptr [[__BEGIN220]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK11-NEXT: store i32 [[TMP42]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK11-NEXT: store i32 [[TMP43]], ptr [[TMP44]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD55:%.*]] = add nsw i64 [[TMP43]], 1 +// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD55:%.*]] = add nsw i64 [[TMP45]], 1 // CHECK11-NEXT: store i64 [[ADD55]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp --- a/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp +++ b/clang/test/OpenMP/parallel_for_lastprivate_conditional.cpp @@ -38,128 +38,143 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[A]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A1:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A1]], i32 0, i32 1 -// CHECK1-NEXT: store i8 0, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A]], i32 0, i32 1 +// CHECK1-NEXT: store i8 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP11]], 5 -// CHECK1-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP13]], 5 +// CHECK1-NEXT: br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: -// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] -// CHECK1-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK1-NEXT: store i32 0, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp sle i32 [[TMP15]], [[TMP14]] +// CHECK1-NEXT: br i1 [[TMP16]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK1: lp_cond_then: -// CHECK1-NEXT: store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr @{{pl_cond[.].+[.|,]}} align 4 // CHECK1-NEXT: br label [[LP_COND_EXIT]] // CHECK1: lp_cond_exit: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP4]], i32 10) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[TMP2]], ptr [[I]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp sle i32 [[TMP17]], [[TMP16]] -// CHECK1-NEXT: br i1 [[TMP18]], label [[LP_COND_THEN4:%.*]], label [[LP_COND_EXIT5:%.*]] -// CHECK1: lp_cond_then4: -// CHECK1-NEXT: store i32 [[TMP16]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: br label [[LP_COND_EXIT5]] -// CHECK1: lp_cond_exit5: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp sle i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: br i1 [[TMP24]], label [[LP_COND_THEN6:%.*]], label [[LP_COND_EXIT7:%.*]] -// CHECK1: lp_cond_then6: -// CHECK1-NEXT: store i32 [[TMP22]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: br label [[LP_COND_EXIT7]] -// CHECK1: lp_cond_exit7: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP4]], i32 10) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, ptr [[TMP2]], ptr [[I]]) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i8 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[LPC_THEN:%.*]], label [[LPC_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP6]], i32 10) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp sle i32 [[TMP21]], [[TMP20]] +// CHECK1-NEXT: br i1 [[TMP22]], label [[LP_COND_THEN3:%.*]], label [[LP_COND_EXIT4:%.*]] +// CHECK1: lp_cond_then3: +// CHECK1-NEXT: store i32 [[TMP20]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: br label [[LP_COND_EXIT4]] +// CHECK1: lp_cond_exit4: +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP24]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp sle i32 [[TMP27]], [[TMP26]] +// CHECK1-NEXT: br i1 [[TMP28]], label [[LP_COND_THEN5:%.*]], label [[LP_COND_EXIT6:%.*]] +// CHECK1: lp_cond_then5: +// CHECK1-NEXT: store i32 [[TMP26]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: br label [[LP_COND_EXIT6]] +// CHECK1: lp_cond_exit6: +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB2]], i32 [[TMP6]], i32 10) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_7]]) +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[A]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i8 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[LPC_THEN:%.*]], label [[LPC_DONE:%.*]] // CHECK1: lpc.then: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = icmp sle i32 [[TMP30]], [[TMP29]] -// CHECK1-NEXT: br i1 [[TMP31]], label [[LP_COND_THEN8:%.*]], label [[LP_COND_EXIT9:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp sle i32 [[TMP36]], [[TMP35]] +// CHECK1-NEXT: br i1 [[TMP37]], label [[LP_COND_THEN8:%.*]], label [[LP_COND_EXIT9:%.*]] // CHECK1: lp_cond_then8: -// CHECK1-NEXT: store i32 [[TMP29]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP32]], ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP35]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr @{{pl_cond[.].+[.|,]}} align 4 // CHECK1-NEXT: br label [[LP_COND_EXIT9]] // CHECK1: lp_cond_exit9: -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK1-NEXT: br label [[LPC_DONE]] // CHECK1: lpc.done: // CHECK1-NEXT: br label [[IF_END]] @@ -168,67 +183,68 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP39]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP4]]) -// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP6]]) +// CHECK1-NEXT: br i1 [[TMP41]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr @{{pl_cond[.].+[.|,]}} align 4 -// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP37]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr @{{pl_cond[.].+[.|,]}} align 4 +// CHECK1-NEXT: store i32 [[TMP42]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD1]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP9]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP12]] monotonic, align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP13]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -242,34 +258,35 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP2]] monotonic, align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: store atomic volatile i8 1, ptr [[TMP5]] unordered, align 1 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP5]] monotonic, align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: store atomic volatile i8 1, ptr [[TMP7]] unordered, align 1 // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_for_linear_codegen.cpp b/clang/test/OpenMP/parallel_for_linear_codegen.cpp --- a/clang/test/OpenMP/parallel_for_linear_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_linear_codegen.cpp @@ -93,17 +93,22 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 // CHECK1-NEXT: store i64 0, ptr [[LVAR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[PVAR]], ptr [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP0]] +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP2]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -117,12 +122,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[LVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca ptr, align 8 @@ -132,88 +136,90 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP1]], align 8 -// CHECK1-NEXT: store i64 [[TMP3]], ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTLINEAR_START1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 3 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 3 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL6]] to i64 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP14]], [[CONV]] -// CHECK1-NEXT: store i64 [[ADD7]], ptr [[LVAR3]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[ADD_PTR8:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 3 -// CHECK1-NEXT: store ptr [[ADD_PTR8]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[LVAR3]], align 8 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i64 [[TMP17]], 3 -// CHECK1-NEXT: store i64 [[ADD9]], ptr [[LVAR3]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 3 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTLINEAR_START1]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 3 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL4]] to i64 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP17]], [[CONV]] +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[LVAR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[ADD_PTR6:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 3 +// CHECK1-NEXT: store ptr [[ADD_PTR6]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[LVAR]], align 8 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP20]], 3 +// CHECK1-NEXT: store i64 [[ADD7]], ptr [[LVAR]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[LVAR3]], align 8 -// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[LVAR]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP4]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -225,11 +231,16 @@ // CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TEST]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[F]], ptr [[PVAR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[LVAR]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, ptr [[PVAR]], ptr [[LVAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[PVAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[LVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: ret i32 0 // @@ -266,12 +277,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[PVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[LVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[PVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca ptr, align 8 @@ -281,87 +291,89 @@ // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PVAR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[LVAR3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[PVAR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[LVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[PVAR]], ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[LVAR]], ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL5]] to i64 -// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[IDX_EXT]] -// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[LVAR3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 1 -// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[LVAR3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[LVAR3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL3]] to i64 +// CHECK1-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[IDX_EXT]] +// CHECK1-NEXT: store ptr [[ADD_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[MUL4]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 1 +// CHECK1-NEXT: store ptr [[INCDEC_PTR]], ptr [[PVAR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[LVAR]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[LVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[PVAR2]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[LVAR3]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PVAR]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[LVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -417,11 +429,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -430,75 +442,77 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP11]], 5 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[MUL3]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[G1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[MUL2]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP15]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -519,18 +533,21 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @g) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -539,55 +556,57 @@ // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTLINEAR_START]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]]) -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]]) +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 // CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: // CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP11]], 5 -// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], [[MUL3]] -// CHECK4-NEXT: store i32 [[ADD4]], ptr [[G1]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 5 -// CHECK4-NEXT: store i32 [[ADD5]], ptr [[G1]], align 4 -// CHECK4-NEXT: store i32 1, ptr [[G1]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP13]], 5 +// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[MUL2]] +// CHECK4-NEXT: store i32 [[ADD3]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 5 +// CHECK4-NEXT: store i32 [[ADD4]], ptr [[G]], align 4 +// CHECK4-NEXT: store i32 1, ptr [[G]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -599,29 +618,29 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP13:%.*]] = load volatile i32, ptr [[G1]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP13]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP15]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 // CHECK4-NEXT: call void [[TMP17]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK4-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP5]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK4-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK4: .omp.linear.pu: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[G1]], align 4 -// CHECK4-NEXT: store i32 [[TMP22]], ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: // CHECK4-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -40,283 +40,289 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP61]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[TMP72]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP73]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP75]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP77]], ptr [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP79]], ptr [[TMP69]]) +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP67]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP68]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[TMP70]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP71]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP68]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP72]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP74]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 +// CHECK1-NEXT: [[TMP77:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP76]], ptr [[TMP68]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP81:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP81]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP83]]) -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP85]], i32 1) -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP86]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP90]], ptr [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP91]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP92]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP94]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP80]]) +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP82]], i32 1) +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP86:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP88]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP89]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP95]], [[TMP96]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP90]], [[TMP91]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP92]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP88]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP100:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP101:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP100]] monotonic, align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP103]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP95]] monotonic, align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP98]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP104:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP109:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP104]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP104]], i8 [[TMP107]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP109]] = extractvalue { i8, i1 } [[TMP108]], 0 -// CHECK1-NEXT: [[TMP110:%.*]] = extractvalue { i8, i1 } [[TMP108]], 1 -// CHECK1-NEXT: br i1 [[TMP110]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP99:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP104:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP99]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP99]], i8 [[TMP102]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP104]] = extractvalue { i8, i1 } [[TMP103]], 0 +// CHECK1-NEXT: [[TMP105:%.*]] = extractvalue { i8, i1 } [[TMP103]], 1 +// CHECK1-NEXT: br i1 [[TMP105]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP111:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP111]]) +// CHECK1-NEXT: [[TMP106:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP106]]) // CHECK1-NEXT: ret void // // @@ -327,8 +333,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -339,12 +345,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -439,59 +445,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -503,38 +509,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp --- a/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_aligned_codegen.cpp @@ -25,17 +25,20 @@ // CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[C_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -45,56 +48,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 16) ] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp --- a/clang/test/OpenMP/parallel_if_codegen.cpp +++ b/clang/test/OpenMP/parallel_if_codegen.cpp @@ -70,35 +70,43 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[TMP2]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[TMP3]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z9gtid_testv() // CHECK1-NEXT: ret void // @@ -107,29 +115,32 @@ // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_1]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr @Arg, align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_1]], ptr [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_3]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR4]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_3]], ptr [[DOTBOUND_ZERO_ADDR4]], ptr [[OMP_OUTLINED_ARG_AGG_2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -139,34 +150,43 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn4v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn5v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn6v() // CHECK1-NEXT: ret void // @@ -175,29 +195,32 @@ // CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..5) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_1]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[DOTTHREADID_TEMP_1]], ptr [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_3]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR4]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[DOTTHREADID_TEMP_3]], ptr [[DOTBOUND_ZERO_ADDR4]], ptr [[OMP_OUTLINED_ARG_AGG_2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -205,34 +228,43 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn1v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn2v() // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: call void @_Z3fn3v() // CHECK1-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp --- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp +++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp @@ -18,37 +18,45 @@ // CHECK-LABEL: define {{[^@]+}}@_Z3foov // CHECK-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] // CHECK-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK-NEXT: ret void // // // CHECK: Function Attrs: noinline norecurse nounwind // CHECK-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: ret void // // -// CHECK: Function Attrs: alwaysinline norecurse nounwind +// CHECK: Function Attrs: norecurse nounwind // CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NEXT: ret void // // @@ -56,36 +64,44 @@ // CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov // CHECK-NOINLINE-SAME: () #[[ATTR0:[0-9]+]] { // CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-NOINLINE-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NOINLINE-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOINLINE-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NOINLINE-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK-NOINLINE-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] // CHECK-NOINLINE-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK-NOINLINE-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK-NOINLINE-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK-NOINLINE-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK-NOINLINE-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK-NOINLINE-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK-NOINLINE-NEXT: ret void // // // CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind // CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NOINLINE-NEXT: entry: // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOINLINE-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] // CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NOINLINE-NEXT: ret void // // -// CHECK-NOINLINE: Function Attrs: alwaysinline norecurse nounwind +// CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind // CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NOINLINE-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK-NOINLINE-NEXT: entry: // CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NOINLINE-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-NOINLINE-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] // CHECK-NOINLINE-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK-NOINLINE-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -296,35 +296,39 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_masterv // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -339,28 +343,32 @@ // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK5-NEXT: store i32 [[INC]], ptr [[A]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -370,30 +378,35 @@ // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[A]]) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK9-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK9-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: ret void @@ -403,33 +416,38 @@ // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 -// CHECK13-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK13-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +// CHECK13-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK13-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK13-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK13-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK13: omp_if.then: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 -// CHECK13-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK13-NEXT: store i32 [[INC]], ptr [[A]], align 4 +// CHECK13-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: br label [[OMP_IF_END]] // CHECK13: omp_if.end: // CHECK13-NEXT: ret void @@ -439,16 +457,17 @@ // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK17-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[Y_CASTED1:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]]) -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ36parallel_master_default_firstprivatevE1y, align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[Y_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[Y_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZN2St1yE, align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[Y_CASTED1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[Y_CASTED1]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[A]], i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ36parallel_master_default_firstprivatevE1y, align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr @_ZN2St1yE, align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4:[0-9]+]] // CHECK17-NEXT: ret void // @@ -464,45 +483,51 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], i64 noundef [[Y:%.*]], i64 noundef [[Y1:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[Y_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A4:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[Y:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[Y1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[Y1]], ptr [[Y_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A4]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK17-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[Y1]], align 4 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[A]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK17-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK17: omp_if.then: -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A4]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[A5]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK17-NEXT: store i32 [[ADD]], ptr [[A5]], align 4 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A4]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], ptr [[B]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[Y_ADDR]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[INC]], ptr [[Y_ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr @_ZN2St1yE, align 4 -// CHECK17-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[INC7]], ptr @_ZN2St1yE, align 4 -// CHECK17-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[A2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[A2]], align 4 +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[A]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[INC]], ptr [[Y]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr @_ZN2St1yE, align 4 +// CHECK17-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: store i32 [[INC4]], ptr @_ZN2St1yE, align 4 +// CHECK17-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: br label [[OMP_IF_END]] // CHECK17: omp_if.end: -// CHECK17-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A4]]) #[[ATTR4]] +// CHECK17-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[A]]) #[[ATTR4]] // CHECK17-NEXT: ret void // // @@ -542,33 +567,38 @@ // CHECK21-SAME: () #[[ATTR0:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK21-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK21-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK21-NEXT: store i32 [[INC]], ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: store i32 [[INC]], ptr [[A]], align 4 +// CHECK21-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: ret void @@ -577,38 +607,42 @@ // CHECK25-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK25-SAME: () #[[ATTR0:[0-9]+]] { // CHECK25-NEXT: entry: -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @a, i64 4, ptr @a.cache.) -// CHECK25-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @a, i64 4, ptr @a.cache.) +// CHECK25-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 // CHECK25-NEXT: [[TMP5:%.*]] = icmp ne i64 ptrtoint (ptr @a to i64), [[TMP4]] // CHECK25-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK25: copyin.not.master: // CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr @a, align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[TMP2]], align 4 +// CHECK25-NEXT: store i32 [[TMP6]], ptr [[TMP3]], align 4 // CHECK25-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK25: copyin.not.master.end: -// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK25-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK25-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK25-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK25-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK25: omp_if.then: -// CHECK25-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP1]], ptr @a, i64 4, ptr @a.cache.) -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK25-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK25-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB1]], i32 [[TMP2]], ptr @a, i64 4, ptr @a.cache.) +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK25-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK25-NEXT: store i32 [[INC]], ptr [[TMP9]], align 4 -// CHECK25-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK25-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK25-NEXT: br label [[OMP_IF_END]] // CHECK25: omp_if.end: // CHECK25-NEXT: ret void @@ -617,45 +651,50 @@ // CHECK29-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK29-SAME: () #[[ATTR0:[0-9]+]] { // CHECK29-NEXT: entry: -// CHECK29-NEXT: [[TMP0:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) -// CHECK29-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK29-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK29-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK29-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) +// CHECK29-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK29-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK29-NEXT: ret void // // // CHECK29-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK29-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK29-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK29-NEXT: entry: // CHECK29-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK29-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK29-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK29-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK29-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK29-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK29-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) -// CHECK29-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK29-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK29-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] -// CHECK29-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK29-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK29-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK29-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK29-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK29-NEXT: [[TMP3:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) +// CHECK29-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK29-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP3]] to i64 +// CHECK29-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP4]], [[TMP5]] +// CHECK29-NEXT: br i1 [[TMP6]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK29: copyin.not.master: -// CHECK29-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK29-NEXT: store i32 [[TMP5]], ptr [[TMP1]], align 4 +// CHECK29-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK29-NEXT: store i32 [[TMP7]], ptr [[TMP3]], align 4 // CHECK29-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK29: copyin.not.master.end: -// CHECK29-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK29-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK29-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]]) // CHECK29-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK29-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK29-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[TMP9]]) -// CHECK29-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK29-NEXT: br i1 [[TMP11]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK29-NEXT: call void @__kmpc_barrier(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]]) +// CHECK29-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK29-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK29-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB2]], i32 [[TMP11]]) +// CHECK29-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK29-NEXT: br i1 [[TMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK29: omp_if.then: -// CHECK29-NEXT: [[TMP12:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) -// CHECK29-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK29-NEXT: store i32 [[INC]], ptr [[TMP12]], align 4 -// CHECK29-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[TMP9]]) +// CHECK29-NEXT: [[TMP14:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @a) +// CHECK29-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK29-NEXT: store i32 [[INC]], ptr [[TMP14]], align 4 +// CHECK29-NEXT: call void @__kmpc_end_master(ptr @[[GLOB2]], i32 [[TMP11]]) // CHECK29-NEXT: br label [[OMP_IF_END]] // CHECK29: omp_if.end: // CHECK29-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -40,238 +40,244 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP56]]) -// CHECK1-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 -// CHECK1-NEXT: br i1 [[TMP58]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: [[TMP56:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP55]]) +// CHECK1-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 +// CHECK1-NEXT: br i1 [[TMP57]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP62]], ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4 -// CHECK1-NEXT: [[TMP65:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP64]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP65]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP68]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP69]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP65]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP73]], ptr [[TMP72]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 -// CHECK1-NEXT: [[TMP76:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP75]], ptr [[TMP65]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP56]]) +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP61:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP61]], ptr [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[TMP62]], align 4 +// CHECK1-NEXT: [[TMP64:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP63]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP64]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP65]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[TMP66]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP67]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP64]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP68]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP70]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load i32, ptr [[TMP71]], align 4 +// CHECK1-NEXT: [[TMP73:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP72]], ptr [[TMP64]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP55]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP78]], i32 0) -// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP79]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP81]], align 8 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP83:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP83]], ptr [[TMP82]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP85]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP87]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP75]], i32 0) +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP82]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP88]], [[TMP89]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP83]], [[TMP84]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP86]] to i32 +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP90]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP85]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP93]] monotonic, align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP96]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP88]] monotonic, align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP97:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP102:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP97]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP101:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP97]], i8 [[TMP100]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP102]] = extractvalue { i8, i1 } [[TMP101]], 0 -// CHECK1-NEXT: [[TMP103:%.*]] = extractvalue { i8, i1 } [[TMP101]], 1 -// CHECK1-NEXT: br i1 [[TMP103]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP92:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP97:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP92]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP92]], i8 [[TMP95]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP97]] = extractvalue { i8, i1 } [[TMP96]], 0 +// CHECK1-NEXT: [[TMP98:%.*]] = extractvalue { i8, i1 } [[TMP96]], 1 +// CHECK1-NEXT: br i1 [[TMP98]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP95]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP104:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP104]]) +// CHECK1-NEXT: [[TMP99:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP99]]) // CHECK1-NEXT: ret void // // @@ -282,8 +288,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -294,12 +300,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -394,59 +400,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -458,38 +464,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -57,100 +57,109 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_9:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP15]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..8) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_9]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -231,37 +240,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -289,7 +303,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -342,92 +356,99 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP11]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP18]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP23]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 80, i64 16, ptr @.omp_task_entry..7) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP20]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP22]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP23]] to i1 -// CHECK1-NEXT: [[TMP24:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP20]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP19]], i32 [[TMP24]], ptr [[TMP25]], ptr [[TMP26]], i64 [[TMP30]], i32 1, i32 2, i64 [[TMP31]], ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK1-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 1, i64 80, i64 16, ptr @.omp_task_entry..7) +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP26]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP27]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP29]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP30]] to i1 +// CHECK1-NEXT: [[TMP31:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: store i64 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP27]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP36]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP26]], i32 [[TMP31]], ptr [[TMP32]], ptr [[TMP33]], i64 [[TMP37]], i32 1, i32 2, i64 [[TMP38]], ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -462,7 +483,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -499,7 +520,7 @@ // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP18]], align 8 @@ -539,7 +560,7 @@ // CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !40 // CHECK1-NEXT: store i64 [[TMP39]], ptr [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -588,35 +609,38 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..10) -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP5]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP5]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP9]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP4]], i32 1, ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP10]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..10) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -645,7 +669,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -743,7 +767,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -751,77 +775,86 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..11, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..13) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..13) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -856,7 +889,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -889,7 +922,7 @@ // CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !64 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 @@ -907,7 +940,7 @@ // CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !64 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: diff --git a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp @@ -207,6 +207,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) @@ -217,15 +218,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -233,8 +242,8 @@ // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -261,51 +270,50 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP23]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -315,22 +323,22 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP17]], i32 0, i32 1 -// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP13]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP18]], i32 1, ptr [[TMP26]], ptr [[TMP27]], i64 [[TMP30]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -432,18 +440,18 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]], ptr [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 4 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -570,38 +578,47 @@ // CHECK1-SAME: () #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..2, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -662,74 +679,73 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP16]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP17]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 128 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP20]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP17]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP21]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP16]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP21]], align 16 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP23]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP12]], i32 1, ptr [[TMP20]], ptr [[TMP21]], i64 [[TMP24]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP21]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP26]], align 16 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP17]], i32 1, ptr [[TMP25]], ptr [[TMP26]], i64 [[TMP29]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -749,16 +765,16 @@ // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP4]], ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 2 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 3 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP13]], align 8 // CHECK1-NEXT: ret void @@ -790,11 +806,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 128 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -825,16 +841,16 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 @@ -868,14 +884,14 @@ // CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP33]], align 128 // CHECK1-NEXT: store i32 [[TMP45]], ptr [[TMP23]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP24]], ptr align 4 [[TMP34]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP26]], i64 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_S_1:%.*]], ptr [[TMP26]], i64 2 // CHECK1-NEXT: br label [[OMP_ARRAYCPY_BODY_I:%.*]] // CHECK1: omp.arraycpy.body.i: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP46]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE_I]], label [[OMP_ARRAYCPY_DONE7_I:%.*]], label [[OMP_ARRAYCPY_BODY_I]] // CHECK1: omp.arraycpy.done7.i: @@ -895,23 +911,23 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 64 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP7]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP7]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP7]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP7]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) // CHECK1-NEXT: ret void // @@ -925,16 +941,16 @@ // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP2]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP3]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP3]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -959,7 +975,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -972,7 +988,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -998,43 +1014,46 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1077,7 +1096,7 @@ // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 @@ -1120,7 +1139,7 @@ // CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1140,7 +1159,7 @@ // CHECK3-NEXT: store double 1.000000e+00, ptr [[TMP25]], align 8 // CHECK3-NEXT: store i32 11, ptr [[TMP26]], align 4 // CHECK3-NEXT: store ptr [[TMP25]], ptr [[REF_TMP_I]], align 8, !noalias !14 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP_I]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP_I]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 8, !noalias !14 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) // CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14 @@ -1194,50 +1213,54 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK4-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK4-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK4-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK4-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK4-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK4-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK4-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: ret void @@ -1337,7 +1360,7 @@ // CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] // CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1416,64 +1439,71 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], ptr [[A_ADDR]], ptr [[S_ADDR]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ADDR]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK5-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP11]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP13]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP10]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 5 -// CHECK5-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 6 -// CHECK5-NEXT: store i64 9, ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 7 -// CHECK5-NEXT: store i64 1, ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 9 -// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP18]], i8 0, i64 8, i1 false) -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP10]], i32 1, ptr [[TMP15]], ptr [[TMP16]], i64 [[TMP19]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 +// CHECK5-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 +// CHECK5-NEXT: store i64 9, ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 +// CHECK5-NEXT: store i64 1, ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 +// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP22]], i8 0, i64 8, i1 false) +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP21]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP14]], i32 1, ptr [[TMP19]], ptr [[TMP20]], i64 [[TMP23]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -1558,9 +1588,9 @@ // CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: call void [[TMP21]](ptr [[TMP22]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1617,49 +1647,54 @@ // CHECK6-SAME: () #[[ATTR0:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]) +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK6-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK6: omp_if.then: -// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 88, i64 8, ptr @.omp_task_entry.) -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK6-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK6-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK6-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 8, ptr @.omp_task_entry.) +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP8]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr [[TMP14]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr [[TMP15]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP16]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP15]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP13]], ptr [[TMP14]], i64 [[TMP17]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: // CHECK6-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -60,14 +60,13 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -76,83 +75,94 @@ // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 // CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK1-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK1-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP14]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP17]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -233,37 +243,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -291,7 +306,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -344,98 +359,104 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP21]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP23]], i64 [[IDXPROM6]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK1-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK1-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP21]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP22]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP24]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP21]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK1-NEXT: [[TMP27:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK1-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP21]], i32 [[TMP27]], ptr [[TMP28]], ptr [[TMP29]], i64 [[TMP33]], i32 1, i32 2, i64 [[TMP34]], ptr @.omp_task_dup.) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK1-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK1-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP29]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP30]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP32]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP29]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP34]] to i1 +// CHECK1-NEXT: [[TMP35:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK1-NEXT: store i64 [[TMP38]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP40]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP29]], i32 [[TMP35]], ptr [[TMP36]], ptr [[TMP37]], i64 [[TMP41]], i32 1, i32 2, i64 [[TMP42]], ptr @.omp_task_dup.) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -485,11 +506,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -522,24 +543,24 @@ // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 // CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP35]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 [[IDXPROM_I]] // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 // CHECK1-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP39]] to i64 @@ -571,9 +592,9 @@ // CHECK1: taskloop.if.then.i: // CHECK1-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -637,11 +658,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: ret void // // @@ -671,7 +692,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -679,77 +700,86 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..8, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK1-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..10) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..10) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -784,7 +814,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -817,7 +847,7 @@ // CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK1-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !62 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 @@ -835,7 +865,7 @@ // CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK1-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP18]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: @@ -872,14 +902,13 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -888,83 +917,94 @@ // CHECK2-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK2-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK2-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK2-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 // CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK2-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK2-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK2-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK2-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK2-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK2-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK2-NEXT: ret i32 [[TMP14]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK2-NEXT: ret i32 [[TMP17]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK2-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK2-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1045,37 +1085,42 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK2-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK2-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1103,7 +1148,7 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -1156,98 +1201,104 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP21]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP23]], i64 [[IDXPROM6]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP25]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK2-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK2-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK2-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP21]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP22]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP24]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP21]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK2-NEXT: [[TMP27:%.*]] = sext i1 [[TOBOOL]] to i32 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 6 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK2-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP22]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, ptr [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP21]], i32 [[TMP27]], ptr [[TMP28]], ptr [[TMP29]], i64 [[TMP33]], i32 1, i32 2, i64 [[TMP34]], ptr @.omp_task_dup.) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[DIV]] to i64 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub i32 [[TMP27]], [[TMP28]] +// CHECK2-NEXT: [[SUB11:%.*]] = sub i32 [[SUB10]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB11]], 1 +// CHECK2-NEXT: [[DIV12:%.*]] = udiv i32 [[ADD]], 1 +// CHECK2-NEXT: [[CONV13:%.*]] = zext i32 [[DIV12]] to i64 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV9]], [[CONV13]] +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK2-NEXT: store i64 [[SUB14]], ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 24, ptr @.omp_task_entry..7) +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP29]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP30]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP32]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP29]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP34:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL15:%.*]] = trunc i8 [[TMP34]] to i1 +// CHECK2-NEXT: [[TMP35:%.*]] = sext i1 [[TOBOOL15]] to i32 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8 +// CHECK2-NEXT: store i64 [[TMP38]], ptr [[TMP37]], align 8 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP30]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP40]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP41:%.*]] = load i64, ptr [[TMP39]], align 8 +// CHECK2-NEXT: [[TMP42:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP29]], i32 [[TMP35]], ptr [[TMP36]], ptr [[TMP37]], i64 [[TMP41]], i32 1, i32 2, i64 [[TMP42]], ptr @.omp_task_dup.) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1297,11 +1348,11 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -1334,24 +1385,24 @@ // CHECK2-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 // CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 // CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 // CHECK2-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 // CHECK2-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP35]] to i64 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 [[IDXPROM_I]] // CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 // CHECK2-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP39]] to i64 @@ -1383,9 +1434,9 @@ // CHECK2: taskloop.if.then.i: // CHECK2-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8 -// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: @@ -1449,11 +1500,11 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK2-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: ret void // // @@ -1463,7 +1514,7 @@ // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1471,77 +1522,86 @@ // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK2-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK2-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK2-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..8, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK2-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK2-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK2-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK2-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK2-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..10) -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK2-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK2-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..10) +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK2-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK2-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK2-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK2-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: ret void @@ -1576,7 +1636,7 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -1609,7 +1669,7 @@ // CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK2-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !62 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 @@ -1627,7 +1687,7 @@ // CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK2-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !62 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP18]], i32 0, i32 1 // CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: @@ -1684,14 +1744,13 @@ // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED6:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED8:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -1700,83 +1759,94 @@ // CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP6]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_2]]) // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_3]], align 1 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK3-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL5]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL7]], ptr [[DOTCAPTURE_EXPR__CASTED6]], align 1 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED6]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR__CASTED8]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED8]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 3 // CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 -// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP13]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK3-NEXT: [[FROMBOOL7:%.*]] = zext i1 [[TOBOOL6]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL7]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_3]], align 1 +// CHECK3-NEXT: [[TOBOOL8:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_5]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[I]], ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_5]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: ret i32 [[TMP14]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK3-NEXT: ret i32 [[TMP17]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP11]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP8]], ptr [[TMP9]], i64 [[TMP12]], i32 1, i32 0, i64 0, ptr null) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 33, i64 80, i64 1, ptr @.omp_task_entry.) +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr null) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1857,37 +1927,42 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP6]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP6]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP10]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP4]] to i64 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP5]], i32 1, ptr [[TMP7]], ptr [[TMP8]], i64 [[TMP11]], i32 1, i32 1, i64 [[TMP12]], ptr null) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 80, i64 1, ptr @.omp_task_entry..4) +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP13]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]], i32 1, ptr [[TMP10]], ptr [[TMP11]], i64 [[TMP14]], i32 1, i32 1, i64 [[TMP15]], ptr null) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1915,7 +1990,7 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -1968,103 +2043,109 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_1:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_10:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] -// CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP15]], i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP17]], i64 [[IDXPROM7]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[ARRAYIDX8]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP19]] to i32 -// CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3: omp_if.then: +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP23]], i64 [[IDXPROM]] +// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 [[IDXPROM8]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP27]] to i32 +// CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP28]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[CONV10:%.*]] = sext i32 [[DIV]] to i64 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK3-NEXT: [[SUB11:%.*]] = sub i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: [[SUB12:%.*]] = sub i32 [[SUB11]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB12]], 1 -// CHECK3-NEXT: [[DIV13:%.*]] = udiv i32 [[ADD]], 1 -// CHECK3-NEXT: [[CONV14:%.*]] = zext i32 [[DIV13]] to i64 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV10]], [[CONV14]] -// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK3-NEXT: store i64 [[SUB15]], ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 88, i64 32, ptr @.omp_task_entry..7) -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP23]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP24]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP26]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP23]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP28:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL16:%.*]] = trunc i8 [[TMP28]] to i1 -// CHECK3-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL16]] to i32 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP30]], align 8 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 6 -// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_9]], align 8 -// CHECK3-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP33]], align 8 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP24]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP34]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[TMP33]], align 8 -// CHECK3-NEXT: [[TMP36:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP23]], i32 [[TMP29]], ptr [[TMP30]], ptr [[TMP31]], i64 [[TMP35]], i32 1, i32 2, i64 [[TMP36]], ptr @.omp_task_dup.) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[CONV11:%.*]] = sext i32 [[DIV]] to i64 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_6]], align 4 +// CHECK3-NEXT: [[SUB12:%.*]] = sub i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: [[SUB13:%.*]] = sub i32 [[SUB12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB13]], 1 +// CHECK3-NEXT: [[DIV14:%.*]] = udiv i32 [[ADD]], 1 +// CHECK3-NEXT: [[CONV15:%.*]] = zext i32 [[DIV14]] to i64 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV11]], [[CONV15]] +// CHECK3-NEXT: [[SUB16:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-NEXT: store i64 [[SUB16]], ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP12]], i32 1, i64 88, i64 32, ptr @.omp_task_entry..7) +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP31]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP32]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP34]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP31]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP36:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL17:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK3-NEXT: [[TMP37:%.*]] = sext i1 [[TOBOOL17]] to i32 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP38]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_10]], align 8 +// CHECK3-NEXT: store i64 [[TMP40]], ptr [[TMP39]], align 8 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP32]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP42]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP43:%.*]] = load i64, ptr [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP12]], ptr [[TMP31]], i32 [[TMP37]], ptr [[TMP38]], ptr [[TMP39]], i64 [[TMP43]], i32 1, i32 2, i64 [[TMP44]], ptr @.omp_task_dup.) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -2114,11 +2195,11 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -2151,24 +2232,24 @@ // CHECK3-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 // CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 // CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 // CHECK3-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP35]] to i64 // CHECK3-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP32]], i64 [[IDXPROM_I]] // CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX_I]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 // CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 // CHECK3-NEXT: [[IDXPROM4_I:%.*]] = sext i32 [[TMP39]] to i64 @@ -2200,11 +2281,11 @@ // CHECK3: taskloop.if.then.i: // CHECK3-NEXT: [[TMP48:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: store i64 [[TMP48]], ptr [[DOTOMP_IV_I]], align 8, !noalias !47 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP50:%.*]] = load ptr, ptr [[TMP49]], align 8 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 2 // CHECK3-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP19]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP19]], i32 0, i32 3 // CHECK3-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP53]], align 1 // CHECK3-NEXT: [[TOBOOL_I:%.*]] = trunc i8 [[TMP54]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL_I]], label [[OMP_IF_THEN_I:%.*]], label [[OMP_IF_ELSE_I:%.*]] @@ -2316,11 +2397,11 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: ret void // // @@ -2350,7 +2431,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2358,77 +2439,86 @@ // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK3-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..8, ptr [[THIS1]], ptr [[C_ADDR]], i64 [[TMP2]]) +// CHECK3-NEXT: store i8 [[FROMBOOL3]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK3-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 -// CHECK3-NEXT: store ptr [[TMP]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK3-NEXT: store ptr [[TMP]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = select i1 [[TOBOOL]], i32 2, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = or i32 [[TMP11]], 1 -// CHECK3-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 [[TMP12]], i64 80, i64 16, ptr @.omp_task_entry..10) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK3-NEXT: store i64 [[CONV]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP21]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP20]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP13]], i32 1, ptr [[TMP17]], ptr [[TMP18]], i64 [[TMP22]], i32 1, i32 2, i64 4, ptr null) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = select i1 [[TOBOOL1]], i32 2, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 [[TMP17]], i64 80, i64 16, ptr @.omp_task_entry..10) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP22]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK3-NEXT: store i64 [[CONV]], ptr [[TMP23]], align 8 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP25]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP27:%.*]] = load i64, ptr [[TMP25]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP18]], i32 1, ptr [[TMP22]], ptr [[TMP23]], i64 [[TMP27]], i32 1, i32 2, i64 4, ptr null) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -2463,7 +2553,7 @@ // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -2496,7 +2586,7 @@ // CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK3-NEXT: store ptr [[TMP_I]], ptr [[TMP1_I]], align 8, !noalias !64 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 // CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 @@ -2514,7 +2604,7 @@ // CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP26]] to i32 // CHECK3-NEXT: store i32 [[CONV_I]], ptr [[DOTOMP_IV_I]], align 4, !noalias !64 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP18]], i32 0, i32 1 // CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp @@ -207,6 +207,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) @@ -217,15 +218,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIdEC1Ed(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] @@ -233,8 +242,8 @@ // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIdED1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev @@ -261,51 +270,50 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP12]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP16]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 120, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP23]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -315,22 +323,22 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP17]], i32 0, i32 1 -// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP14]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP13]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: call void @_ZN1SIdEC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP29]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP18]], i32 1, ptr [[TMP26]], ptr [[TMP27]], i64 [[TMP30]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -432,18 +440,18 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]], ptr [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 4 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 // CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -570,38 +578,47 @@ // CHECK1-SAME: () #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..2, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev @@ -662,74 +679,73 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP5]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP16]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP17]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP10]], i32 9, i64 256, i64 32, ptr @.omp_task_entry..5) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP17]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 128 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP20]], ptr align 8 [[AGG_CAPTURED]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP17]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP21]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP16]], i32 0, i32 3 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 9, ptr [[TMP21]], align 16 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP23]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[TMP22]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[TMP12]], i32 1, ptr [[TMP20]], ptr [[TMP21]], i64 [[TMP24]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP21]], i32 0, i32 3 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.omp_task_destructor..7, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 6 +// CHECK1-NEXT: store i64 9, ptr [[TMP26]], align 16 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP18]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP10]], ptr [[TMP17]], i32 1, ptr [[TMP25]], ptr [[TMP26]], i64 [[TMP29]], i32 1, i32 0, i64 0, ptr @.omp_task_dup..6) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -749,16 +765,16 @@ // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP4]], ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 2 // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR3]], align 8 // CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP5]], i32 0, i32 3 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTADDR4]], align 8 // CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP13]], align 8 // CHECK1-NEXT: ret void @@ -790,11 +806,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 128 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 5 // CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 6 @@ -825,16 +841,16 @@ // CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]], ptr [[DOTLASTPRIV_PTR_ADDR2_I]], ptr [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP19]], i32 0, i32 3 // CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8 // CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 @@ -868,14 +884,14 @@ // CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP33]], align 128 // CHECK1-NEXT: store i32 [[TMP45]], ptr [[TMP23]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP24]], ptr align 4 [[TMP34]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_S_0:%.*]], ptr [[TMP26]], i64 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_S_1:%.*]], ptr [[TMP26]], i64 2 // CHECK1-NEXT: br label [[OMP_ARRAYCPY_BODY_I:%.*]] // CHECK1: omp.arraycpy.body.i: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST_I:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST_I:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_THEN_I]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT_I:%.*]], [[OMP_ARRAYCPY_BODY_I]] ] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i64 4, i1 false) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST_I]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT_I]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST_I]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DONE_I:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT_I]], [[TMP46]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE_I]], label [[OMP_ARRAYCPY_DONE7_I:%.*]], label [[OMP_ARRAYCPY_BODY_I]] // CHECK1: omp.arraycpy.done7.i: @@ -895,23 +911,23 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTADDR2]], align 4 // CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 64 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP7]], i32 0, i32 2 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP7]], i32 0, i32 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP7]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP7]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) // CHECK1-NEXT: ret void // @@ -925,16 +941,16 @@ // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP2]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], ptr [[TMP3]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], ptr [[TMP3]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP2]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5:%.*]], ptr [[TMP3]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_5]], ptr [[TMP3]], i32 0, i32 3 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -959,7 +975,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -972,7 +988,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -998,43 +1014,46 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK3-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK3-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK3-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK3-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK3-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK3-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK3-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK3-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1077,7 +1096,7 @@ // CHECK3-NEXT: [[DOTLASTPRIV_PTR_ADDR1_I:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[I_I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTADDR]], align 4 @@ -1120,7 +1139,7 @@ // CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1140,7 +1159,7 @@ // CHECK3-NEXT: store double 1.000000e+00, ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: store i32 11, ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: store ptr [[TMP25]], ptr [[REF_TMP_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP_I]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP_I]], i32 0, i32 1 // CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP31]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] @@ -1194,50 +1213,54 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK4-NEXT: store ptr @g, ptr [[TMP4]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP5]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 5 -// CHECK4-NEXT: store i64 0, ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 6 -// CHECK4-NEXT: store i64 9, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 7 -// CHECK4-NEXT: store i64 1, ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP7]], i32 0, i32 9 -// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP14]], i8 0, i64 8, i1 false) -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP13]], align 8 -// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP6]], i32 1, ptr [[TMP11]], ptr [[TMP12]], i64 [[TMP15]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 16, ptr @.omp_task_entry.) +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP10]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP7]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 5 +// CHECK4-NEXT: store i64 0, ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 6 +// CHECK4-NEXT: store i64 9, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 7 +// CHECK4-NEXT: store i64 1, ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP8]], i32 0, i32 9 +// CHECK4-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) +// CHECK4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP7]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK4-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: ret void @@ -1337,7 +1360,7 @@ // CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: call void [[TMP20]](ptr [[TMP21]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] // CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1416,64 +1439,71 @@ // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], ptr [[A_ADDR]], ptr [[S_ADDR]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ADDR]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK5-NEXT: br i1 [[TMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK5-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP10:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP11]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP13]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP10]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 5 -// CHECK5-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 6 -// CHECK5-NEXT: store i64 9, ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 7 -// CHECK5-NEXT: store i64 1, ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP11]], i32 0, i32 9 -// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP18]], i8 0, i64 8, i1 false) -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8 -// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP10]], i32 1, ptr [[TMP15]], ptr [[TMP16]], i64 [[TMP19]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) -// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP14:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i64 96, i64 24, ptr @.omp_task_entry.) +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP14]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP15]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP17]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP14]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 5 +// CHECK5-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 6 +// CHECK5-NEXT: store i64 9, ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 7 +// CHECK5-NEXT: store i64 1, ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP15]], i32 0, i32 9 +// CHECK5-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP22]], i8 0, i64 8, i1 false) +// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP21]], align 8 +// CHECK5-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP8]], ptr [[TMP14]], i32 1, ptr [[TMP19]], ptr [[TMP20]], i64 [[TMP23]], i32 1, i32 0, i64 0, ptr @.omp_task_dup.) +// CHECK5-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: ret void @@ -1558,9 +1588,9 @@ // CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: call void [[TMP21]](ptr [[TMP22]], ptr [[DOTLASTPRIV_PTR_ADDR_I]], ptr [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP19]], i32 0, i32 2 // CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 // CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 @@ -1618,53 +1648,59 @@ // CHECK6-NEXT: entry: // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[I]], ptr [[J]]) +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[I]], ptr [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[J]], ptr [[TMP1]], align 8 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[J:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[J_ADDR:%.*]] = alloca ptr, align 8 -// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK6-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: store ptr [[J]], ptr [[J_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[J_ADDR]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK6-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_master(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: br i1 [[TMP8]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK6: omp_if.then: -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK6-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i64 80, i64 16, ptr @.omp_task_entry.) -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP8]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 5 -// CHECK6-NEXT: store i64 0, ptr [[TMP12]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 6 -// CHECK6-NEXT: store i64 9, ptr [[TMP13]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 7 -// CHECK6-NEXT: store i64 1, ptr [[TMP14]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP9]], i32 0, i32 9 -// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP15]], i8 0, i64 8, i1 false) -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP14]], align 8 -// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP3]], ptr [[TMP8]], i32 1, ptr [[TMP12]], ptr [[TMP13]], i64 [[TMP16]], i32 1, i32 0, i64 0, ptr null) -// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: [[TMP11:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 80, i64 16, ptr @.omp_task_entry.) +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP11]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK6-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP14]], ptr align 8 [[AGG_CAPTURED]], i64 16, i1 false) +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 5 +// CHECK6-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 6 +// CHECK6-NEXT: store i64 9, ptr [[TMP16]], align 8 +// CHECK6-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 7 +// CHECK6-NEXT: store i64 1, ptr [[TMP17]], align 8 +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP12]], i32 0, i32 9 +// CHECK6-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP18]], i8 0, i64 8, i1 false) +// CHECK6-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK6-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP6]], ptr [[TMP11]], i32 1, ptr [[TMP15]], ptr [[TMP16]], i64 [[TMP19]], i32 1, i32 0, i64 0, ptr null) +// CHECK6-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK6-NEXT: call void @__kmpc_end_master(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: // CHECK6-NEXT: ret void @@ -1729,7 +1765,7 @@ // CHECK6-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 // CHECK6-NEXT: store i32 [[TMP20]], ptr [[DOTLINEAR_START_I]], align 4, !noalias !14 -// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP18]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP18]], i32 0, i32 1 // CHECK6-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 // CHECK6-NEXT: store i32 [[TMP23]], ptr [[DOTLINEAR_START1_I]], align 4, !noalias !14 @@ -1764,7 +1800,7 @@ // CHECK6-NEXT: br i1 [[TMP34]], label [[DOTOMP_LINEAR_PU_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK6: .omp.linear.pu.i: // CHECK6-NEXT: [[TMP35:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK6-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP18]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP18]], i32 0, i32 1 // CHECK6-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 // CHECK6-NEXT: [[TMP38:%.*]] = load i32, ptr [[J_I]], align 4, !noalias !14 // CHECK6-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4 diff --git a/clang/test/OpenMP/parallel_private_codegen.cpp b/clang/test/OpenMP/parallel_private_codegen.cpp --- a/clang/test/OpenMP/parallel_private_codegen.cpp +++ b/clang/test/OpenMP/parallel_private_codegen.cpp @@ -177,6 +177,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(16) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE5sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -187,23 +188,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -243,10 +244,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -254,6 +256,8 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -265,18 +269,18 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: store i32 3, ptr [[SIVAR]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -299,37 +303,38 @@ // CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -337,6 +342,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -350,16 +356,18 @@ // CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -367,21 +375,23 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -453,39 +463,42 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 128 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX1]], ptr align 128 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] @@ -509,7 +522,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -518,34 +531,39 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[THIS1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[DOTA__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP2]], i64 4, ptr inttoptr (i64 2 to ptr)) -// CHECK1-NEXT: store ptr [[DOTA__VOID_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP2]], ptr [[DOTA__VOID_ADDR]], ptr inttoptr (i64 2 to ptr)) +// CHECK1-NEXT: [[DOTA__VOID_ADDR:%.*]] = call ptr @__kmpc_alloc(i32 [[TMP4]], i64 4, ptr inttoptr (i64 2 to ptr)) +// CHECK1-NEXT: store ptr [[DOTA__VOID_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP4]], ptr [[DOTA__VOID_ADDR]], ptr inttoptr (i64 2 to ptr)) // CHECK1-NEXT: ret void // // @@ -557,7 +575,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -602,6 +620,7 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -615,16 +634,18 @@ // CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[THIS1]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -633,20 +654,22 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -655,6 +678,7 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR2:[0-9]+]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -674,16 +698,18 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -691,40 +717,45 @@ // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 1, ptr [[G]], align 128 // CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP2]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -759,22 +790,26 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 1, ptr [[G]], align 128 // CHECK4-NEXT: store i32 20, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 @@ -788,14 +823,14 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP1]], ptr [[BLOCK_CAPTURED]], align 128 // CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 32 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[BLOCK_CAPTURED1]], align 32 // CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK4-NEXT: call void [[TMP5]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: call void [[TMP4]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -818,6 +853,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -831,16 +867,18 @@ // CHECK4-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[THIS1]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -849,8 +887,10 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 @@ -864,18 +904,18 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.4, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[BLOCK_CAPTURED2]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[BLOCK_CAPTURED2]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[BLOCK_CAPTURED3]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[BLOCK_CAPTURED3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 // CHECK4-NEXT: call void [[TMP7]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: ret void // @@ -885,6 +925,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -903,16 +944,18 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[THIS]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -920,20 +963,22 @@ // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp @@ -332,18 +332,21 @@ // CHECK1-SAME: (ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 @@ -352,52 +355,54 @@ // CHECK1-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP6]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP9]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i16 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP14]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP21]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP16]] +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP22]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: @@ -415,10 +420,10 @@ // CHECK1-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done7: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] // CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK1: omp.arraycpy.body9: @@ -464,30 +469,30 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP13]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV3]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void @@ -506,6 +511,9 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[CF:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN2SSC1ERi(ptr noundef nonnull align 8 dereferenceable(16) [[SS]], ptr noundef nonnull align 4 dereferenceable(4) @sivar) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) @@ -517,32 +525,58 @@ // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..1, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: br label [[IF_END]] // CHECK1: if.end: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[CF]]) -// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z5tmainIiET_v() -// CHECK1-NEXT: store i32 [[CALL1]], ptr [[RETVAL]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[CF]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK1-NEXT: store i32 [[CALL3]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[IF_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[IF_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done2: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done4: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP14]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -582,151 +616,148 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP12:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP7:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP6]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP13]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR3]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store float [[ADD]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL7]], 0.000000e+00 +// CHECK1-NEXT: store float [[ADD]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL2]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL9:%.*]] = fcmp une float [[CALL8]], 0.000000e+00 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL4:%.*]] = fcmp une float [[CALL3]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP25:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL9]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = uitofp i1 [[TMP25]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV10]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV5:%.*]] = uitofp i1 [[TMP23]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV5]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP30]], [[COND_TRUE]] ], [ [[TMP31]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP32:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw fadd ptr [[TMP1]], float [[TMP32]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[CALL11]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL13:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL14:%.*]] = fcmp une float [[CALL13]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL14]], label [[LAND_RHS15:%.*]], label [[LAND_END18:%.*]] -// CHECK1: land.rhs15: -// CHECK1-NEXT: [[CALL16:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL17:%.*]] = fcmp une float [[CALL16]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END18]] -// CHECK1: land.end18: -// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL17]], [[LAND_RHS15]] ] -// CHECK1-NEXT: [[CONV19:%.*]] = uitofp i1 [[TMP36]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]], float noundef [[CONV19]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[REF_TMP12]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP12]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP5]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw fadd ptr [[TMP4]], float [[TMP28]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP8]], ptr align 4 [[CALL6]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL8:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL9:%.*]] = fcmp une float [[CALL8]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL9]], label [[LAND_RHS10:%.*]], label [[LAND_END13:%.*]] +// CHECK1: land.rhs10: +// CHECK1-NEXT: [[CALL11:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL12:%.*]] = fcmp une float [[CALL11]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END13]] +// CHECK1: land.end13: +// CHECK1-NEXT: [[TMP30:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL12]], [[LAND_RHS10]] ] +// CHECK1-NEXT: [[CONV14:%.*]] = uitofp i1 [[TMP30]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP7]], float noundef [[CONV14]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[REF_TMP7]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP7]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP12]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END18]] ], [ [[TMP51:%.*]], [[COND_END23:%.*]] ] -// CHECK1-NEXT: [[TMP43:%.*]] = bitcast i32 [[TMP41]] to float -// CHECK1-NEXT: store float [[TMP43]], ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP20:%.*]] = fcmp olt float [[TMP44]], [[TMP45]] -// CHECK1-NEXT: br i1 [[CMP20]], label [[COND_TRUE21:%.*]], label [[COND_FALSE22:%.*]] -// CHECK1: cond.true21: -// CHECK1-NEXT: [[TMP46:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: br label [[COND_END23]] -// CHECK1: cond.false22: -// CHECK1-NEXT: [[TMP47:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END23]] -// CHECK1: cond.end23: -// CHECK1-NEXT: [[COND24:%.*]] = phi float [ [[TMP46]], [[COND_TRUE21]] ], [ [[TMP47]], [[COND_FALSE22]] ] -// CHECK1-NEXT: store float [[COND24]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = cmpxchg ptr [[TMP5]], i32 [[TMP41]], i32 [[TMP48]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP51]] = extractvalue { i32, i1 } [[TMP50]], 0 -// CHECK1-NEXT: [[TMP52:%.*]] = extractvalue { i32, i1 } [[TMP50]], 1 -// CHECK1-NEXT: br i1 [[TMP52]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP32:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END13]] ], [ [[TMP40:%.*]], [[COND_END18:%.*]] ] +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32 [[TMP32]] to float +// CHECK1-NEXT: store float [[TMP33]], ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP15:%.*]] = fcmp olt float [[TMP34]], [[TMP35]] +// CHECK1-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]] +// CHECK1: cond.true16: +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END18]] +// CHECK1: cond.false17: +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END18]] +// CHECK1: cond.end18: +// CHECK1-NEXT: [[COND19:%.*]] = phi float [ [[TMP36]], [[COND_TRUE16]] ], [ [[TMP37]], [[COND_FALSE17]] ] +// CHECK1-NEXT: store float [[COND19]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = cmpxchg ptr [[TMP12]], i32 [[TMP32]], i32 [[TMP38]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP40]] = extractvalue { i32, i1 } [[TMP39]], 0 +// CHECK1-NEXT: [[TMP41:%.*]] = extractvalue { i32, i1 } [[TMP39]], 1 +// CHECK1-NEXT: br i1 [[TMP41]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -739,55 +770,55 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store float [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP16]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP22]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store float [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP11]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP15]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL2]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP19]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = fcmp une float [[CALL3]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = uitofp i1 [[TMP34]] to float +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = uitofp i1 [[TMP22]] to float // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP22]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP19]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP17]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP39]], [[COND_TRUE]] ], [ [[TMP40]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], ptr [[TMP19]], align 4 // CHECK1-NEXT: ret void // // @@ -822,102 +853,101 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[WHILE_COND:%.*]] // CHECK1: while.cond: // CHECK1-NEXT: br label [[WHILE_BODY:%.*]] // CHECK1: while.body: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP6]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP13]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR3]], i64 4, i1 false) +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: br label [[WHILE_COND]], !llvm.loop [[LOOP5:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CF:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CF_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CF1:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[CF:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca { float, float }, align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca { float, float }, align 4 +// CHECK1-NEXT: [[ATOMIC_TEMP9:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[CF]], ptr [[CF_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[CF_ADDR]], align 8 -// CHECK1-NEXT: [[CF1_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF1_REALP]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF1_IMAGP]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[CF1]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[CF_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF_REALP]], align 4 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[CF_IMAGP]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[CF]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP6]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4 -// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 1 // CHECK1-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4 -// CHECK1-NEXT: [[CF1_REALP2:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL:%.*]] = load float, ptr [[CF1_REALP2]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP3:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG:%.*]] = load float, ptr [[CF1_IMAGP3]], align 4 -// CHECK1-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[CF1_REAL]] -// CHECK1-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[CF1_IMAG]] -// CHECK1-NEXT: [[DOTREALP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[DOTIMAGP5:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP0]], i32 0, i32 1 -// CHECK1-NEXT: store float [[ADD_R]], ptr [[DOTREALP4]], align 4 -// CHECK1-NEXT: store float [[ADD_I]], ptr [[DOTIMAGP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[CF_REALP1:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL:%.*]] = load float, ptr [[CF_REALP1]], align 4 +// CHECK1-NEXT: [[CF_IMAGP2:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG:%.*]] = load float, ptr [[CF_IMAGP2]], align 4 +// CHECK1-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[CF_REAL]] +// CHECK1-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[CF_IMAG]] +// CHECK1-NEXT: [[DOTREALP3:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTIMAGP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP2]], i32 0, i32 1 +// CHECK1-NEXT: store float [[ADD_R]], ptr [[DOTREALP3]], align 4 +// CHECK1-NEXT: store float [[ADD_I]], ptr [[DOTIMAGP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[CF1_REALP6:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL7:%.*]] = load float, ptr [[CF1_REALP6]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP8:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG9:%.*]] = load float, ptr [[CF1_IMAGP8]], align 4 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 8, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) +// CHECK1-NEXT: [[CF_REALP5:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL6:%.*]] = load float, ptr [[CF_REALP5]], align 4 +// CHECK1-NEXT: [[CF_IMAGP7:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG8:%.*]] = load float, ptr [[CF_IMAGP7]], align 4 +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 8, ptr noundef [[TMP2]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: // CHECK1-NEXT: [[ATOMIC_TEMP_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP]], i32 0, i32 0 @@ -928,21 +958,21 @@ // CHECK1-NEXT: [[TMP_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 1 // CHECK1-NEXT: store float [[ATOMIC_TEMP_REAL]], ptr [[TMP_REALP]], align 4 // CHECK1-NEXT: store float [[ATOMIC_TEMP_IMAG]], ptr [[TMP_IMAGP]], align 4 -// CHECK1-NEXT: [[TMP_REALP11:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP_REAL:%.*]] = load float, ptr [[TMP_REALP11]], align 4 -// CHECK1-NEXT: [[TMP_IMAGP12:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP_IMAG:%.*]] = load float, ptr [[TMP_IMAGP12]], align 4 -// CHECK1-NEXT: [[CF1_REALP13:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 0 -// CHECK1-NEXT: [[CF1_REAL14:%.*]] = load float, ptr [[CF1_REALP13]], align 4 -// CHECK1-NEXT: [[CF1_IMAGP15:%.*]] = getelementptr inbounds { float, float }, ptr [[CF1]], i32 0, i32 1 -// CHECK1-NEXT: [[CF1_IMAG16:%.*]] = load float, ptr [[CF1_IMAGP15]], align 4 -// CHECK1-NEXT: [[ADD_R17:%.*]] = fadd float [[TMP_REAL]], [[CF1_REAL14]] -// CHECK1-NEXT: [[ADD_I18:%.*]] = fadd float [[TMP_IMAG]], [[CF1_IMAG16]] -// CHECK1-NEXT: [[ATOMIC_TEMP10_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP10]], i32 0, i32 0 -// CHECK1-NEXT: [[ATOMIC_TEMP10_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP10]], i32 0, i32 1 -// CHECK1-NEXT: store float [[ADD_R17]], ptr [[ATOMIC_TEMP10_REALP]], align 4 -// CHECK1-NEXT: store float [[ADD_I18]], ptr [[ATOMIC_TEMP10_IMAGP]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 8, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP10]], i32 noundef 0, i32 noundef 0) +// CHECK1-NEXT: [[TMP_REALP10:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP_REAL:%.*]] = load float, ptr [[TMP_REALP10]], align 4 +// CHECK1-NEXT: [[TMP_IMAGP11:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP_IMAG:%.*]] = load float, ptr [[TMP_IMAGP11]], align 4 +// CHECK1-NEXT: [[CF_REALP12:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 0 +// CHECK1-NEXT: [[CF_REAL13:%.*]] = load float, ptr [[CF_REALP12]], align 4 +// CHECK1-NEXT: [[CF_IMAGP14:%.*]] = getelementptr inbounds { float, float }, ptr [[CF]], i32 0, i32 1 +// CHECK1-NEXT: [[CF_IMAG15:%.*]] = load float, ptr [[CF_IMAGP14]], align 4 +// CHECK1-NEXT: [[ADD_R16:%.*]] = fadd float [[TMP_REAL]], [[CF_REAL13]] +// CHECK1-NEXT: [[ADD_I17:%.*]] = fadd float [[TMP_IMAG]], [[CF_IMAG15]] +// CHECK1-NEXT: [[ATOMIC_TEMP9_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[ATOMIC_TEMP9_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[ATOMIC_TEMP9]], i32 0, i32 1 +// CHECK1-NEXT: store float [[ADD_R16]], ptr [[ATOMIC_TEMP9_REALP]], align 4 +// CHECK1-NEXT: store float [[ADD_I17]], ptr [[ATOMIC_TEMP9_IMAGP]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 8, ptr noundef [[TMP2]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP9]], i32 noundef 0, i32 noundef 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] @@ -958,23 +988,23 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP10]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 0 // CHECK1-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4 -// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP10]], i32 0, i32 1 +// CHECK1-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 1 // CHECK1-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4 -// CHECK1-NEXT: [[DOTREALP2:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTREALP2:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP5]], i32 0, i32 0 // CHECK1-NEXT: [[DOTREAL3:%.*]] = load float, ptr [[DOTREALP2]], align 4 -// CHECK1-NEXT: [[DOTIMAGP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 1 +// CHECK1-NEXT: [[DOTIMAGP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP5]], i32 0, i32 1 // CHECK1-NEXT: [[DOTIMAG5:%.*]] = load float, ptr [[DOTIMAGP4]], align 4 // CHECK1-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[DOTREAL3]] // CHECK1-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[DOTIMAG5]] -// CHECK1-NEXT: [[DOTREALP6:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP10]], i32 0, i32 0 -// CHECK1-NEXT: [[DOTIMAGP7:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP10]], i32 0, i32 1 +// CHECK1-NEXT: [[DOTREALP6:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 0 +// CHECK1-NEXT: [[DOTIMAGP7:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 1 // CHECK1-NEXT: store float [[ADD_R]], ptr [[DOTREALP6]], align 4 // CHECK1-NEXT: store float [[ADD_I]], ptr [[DOTIMAGP7]], align 4 // CHECK1-NEXT: ret void @@ -985,41 +1015,54 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.4], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..8, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_4]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1030,6 +1073,7 @@ // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1048,14 +1092,22 @@ // CHECK1-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C6]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C5]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C5]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..6, ptr [[THIS1]], ptr [[TMP2]], ptr [[B4]], ptr [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B4]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C5]], align 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 // CHECK1-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK1-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK1-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B7]], align 4 -// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK1-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK1-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK1-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK1-NEXT: store i8 [[BF_SET10]], ptr [[B7]], align 4 @@ -1063,87 +1115,86 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK1-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK1-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK1-NEXT: store i32 [[DEC]], ptr [[B4]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A2]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B4]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[C5]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP27]] monotonic, align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP29]] monotonic, align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP31]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1157,31 +1208,31 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP15]], align 4 // CHECK1-NEXT: ret void // // @@ -1258,124 +1309,121 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S_0]], align 128 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_4:%.*]], align 128 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_4]], align 128 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP11:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_4]], align 4 +// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca [[STRUCT_S_4]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 128 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR15]], align 128 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR2]], align 128 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 128 [[VAR3]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.4], ptr [[TMP6]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 128 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 128 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 128 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP1]], align 128 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP3]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL7]], 0 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 128 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL9:%.*]] = icmp ne i32 [[CALL8]], 0 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP25:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL9]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP25]] to i32 +// CHECK1-NEXT: [[TMP23:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP23]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP10]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP5]], align 128 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR15]], align 128 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 128 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP24]], [[TMP25]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP5]], align 128 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP12]], align 128 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR15]], align 128 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR1]], align 128 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE]] ], [ [[TMP31]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP5]], align 128 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP12]], align 128 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR2]], align 128 -// CHECK1-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP32]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP3]], ptr align 4 [[CALL10]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]]) -// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[CALL12]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END17:%.*]] -// CHECK1: land.rhs14: -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL16:%.*]] = icmp ne i32 [[CALL15]], 0 -// CHECK1-NEXT: br label [[LAND_END17]] -// CHECK1: land.end17: -// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL16]], [[LAND_RHS14]] ] -// CHECK1-NEXT: [[CONV18:%.*]] = zext i1 [[TMP36]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]], i32 noundef [[CONV18]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP4]], ptr align 4 [[REF_TMP11]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]]) #[[ATTR5]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP18]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR15]], align 128 -// CHECK1-NEXT: [[TMP40:%.*]] = atomicrmw min ptr [[TMP5]], i32 [[TMP39]] monotonic, align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP28]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP8]], ptr align 4 [[CALL5]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]]) +// CHECK1-NEXT: [[TOBOOL8:%.*]] = icmp ne i32 [[CALL7]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL8]], label [[LAND_RHS9:%.*]], label [[LAND_END12:%.*]] +// CHECK1: land.rhs9: +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[CALL10]], 0 +// CHECK1-NEXT: br label [[LAND_END12]] +// CHECK1: land.end12: +// CHECK1-NEXT: [[TMP30:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL11]], [[LAND_RHS9]] ] +// CHECK1-NEXT: [[CONV13:%.*]] = zext i1 [[TMP30]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP6]], i32 noundef [[CONV13]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP10]], ptr align 4 [[REF_TMP6]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP12]], i32 [[TMP31]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: ret void // // @@ -1384,59 +1432,59 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_4:%.*]], align 4 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP10]], align 128 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 128 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 128 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP16]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP22]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 128 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP5]], align 128 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 128 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP11]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP11]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP15]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP19]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP34]] to i32 +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP22]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP22]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP15]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP28]], align 128 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP25]], align 128 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 128 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP17]], align 128 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP28]], align 128 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP19]], align 128 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP25]], align 128 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP17]], align 128 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE]] ], [ [[TMP40]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP28]], align 128 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP19]], align 128 // CHECK1-NEXT: ret void // // @@ -1476,7 +1524,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1487,76 +1535,82 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[A]], align 4 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..10, ptr [[THIS1]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store i32 1, ptr [[A1]], align 4 -// CHECK1-NEXT: store ptr [[A1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store i32 1, ptr [[A]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP11]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[MUL]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[MUL]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP2]] monotonic, align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP5]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP19:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[_TMP3]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[_TMP3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[MUL4]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = cmpxchg ptr [[TMP2]], i32 [[TMP14]], i32 [[TMP17]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP19]] = extractvalue { i32, i1 } [[TMP18]], 0 -// CHECK1-NEXT: [[TMP20:%.*]] = extractvalue { i32, i1 } [[TMP18]], 1 -// CHECK1-NEXT: br i1 [[TMP20]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP15:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[TMP20:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[_TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = cmpxchg ptr [[TMP5]], i32 [[TMP15]], i32 [[TMP18]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP20]] = extractvalue { i32, i1 } [[TMP19]], 0 +// CHECK1-NEXT: [[TMP21:%.*]] = extractvalue { i32, i1 } [[TMP19]], 1 +// CHECK1-NEXT: br i1 [[TMP21]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -1571,15 +1625,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[MUL]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[MUL]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -1591,7 +1645,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1612,18 +1666,21 @@ // CHECK3-SAME: (ptr noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 @@ -1632,52 +1689,54 @@ // CHECK3-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 -// CHECK3-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK3-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK3-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK3-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK3-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK3-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave() -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8 -// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK3-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP6]] -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP9]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 0 +// CHECK3-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK3-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK3-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK3-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK3-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 +// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK3-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP8]] +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i16 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK3-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP14]] -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP21]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK3-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK3-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP16]] +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP22]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] // CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: @@ -1695,10 +1754,10 @@ // CHECK3-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done7: -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] // CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK3: omp.arraycpy.body9: @@ -1744,30 +1803,30 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[TMP10]], i64 [[TMP14]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP15]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP7]], i64 [[TMP10]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP13]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK3-NEXT: store i16 [[CONV3]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done4: // CHECK3-NEXT: ret void @@ -1806,6 +1865,7 @@ // CHECK3-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C5:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1824,14 +1884,22 @@ // CHECK3-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C6]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[C5]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C5]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[THIS1]], ptr [[TMP2]], ptr [[B4]], ptr [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C5]], align 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 // CHECK3-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK3-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK3-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B7]], align 4 -// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK3-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK3-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK3-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK3-NEXT: store i8 [[BF_SET10]], ptr [[B7]], align 4 @@ -1839,88 +1907,87 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK3-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK3-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[A2]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP14]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK3-NEXT: store ptr [[C5]], ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP19]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP28]] monotonic, align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP30]] monotonic, align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP32]] monotonic, align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP29]] monotonic, align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP31]] monotonic, align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP33]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1930,32 +1997,41 @@ // CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR0]] align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 // CHECK3-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP1]], ptr [[TMP12]], ptr [[TMP14]], ptr [[TMP16]]) +// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // @@ -1967,116 +2043,115 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP15]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK3-NEXT: store i32 -1, ptr [[A2]], align 4 -// CHECK3-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK3-NEXT: store i32 -1, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK3-NEXT: store i32 -1, ptr [[C5]], align 4 -// CHECK3-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK3-NEXT: store i32 [[DEC]], ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[A2]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK3-NEXT: store ptr [[B4]], ptr [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK3-NEXT: store ptr [[C5]], ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK3-NEXT: store i32 -1, ptr [[A]], align 4 +// CHECK3-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK3-NEXT: store i32 -1, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK3-NEXT: store i32 -1, ptr [[C]], align 4 +// CHECK3-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK3-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK3-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: store i32 [[AND]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[AND7:%.*]] = and i32 [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store i32 [[AND7]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[AND8:%.*]] = and i32 [[TMP25]], [[TMP26]] -// CHECK3-NEXT: store i32 [[AND8]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[AND]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[AND4:%.*]] = and i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[AND4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[AND5:%.*]] = and i32 [[TMP26]], [[TMP27]] +// CHECK3-NEXT: store i32 [[AND5]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = atomicrmw and ptr [[TMP4]], i32 [[TMP27]] monotonic, align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = atomicrmw and ptr [[TMP2]], i32 [[TMP29]] monotonic, align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = atomicrmw and ptr [[TMP5]], i32 [[TMP31]] monotonic, align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = atomicrmw and ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = atomicrmw and ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2090,71 +2165,73 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[AND]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[AND2:%.*]] = and i32 [[TMP26]], [[TMP27]] -// CHECK3-NEXT: store i32 [[AND2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[AND3:%.*]] = and i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[AND3]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[AND:%.*]] = and i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[AND]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[AND2:%.*]] = and i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[AND2]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[AND3:%.*]] = and i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[AND3]], ptr [[TMP15]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP1]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[G]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP3]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP7]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[G1]], align 128 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[G]], align 128 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 128 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP5]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 128 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[G1]], align 128 -// CHECK3-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP10]] monotonic, align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[G]], align 128 +// CHECK3-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP10]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -2168,15 +2245,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 128 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 128 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 128 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 128 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 128 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 128 // CHECK3-NEXT: ret void // // @@ -2184,18 +2261,21 @@ // CHECK4-SAME: (ptr noundef [[X:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[X:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 @@ -2204,52 +2284,54 @@ // CHECK4-NEXT: [[_TMP13:%.*]] = alloca i16, align 2 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 0 -// CHECK4-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 -// CHECK4-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK4-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK4-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK4-NEXT: [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1 -// CHECK4-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK4-NEXT: [[TMP8:%.*]] = call ptr @llvm.stacksave() -// CHECK4-NEXT: store ptr [[TMP8]], ptr [[SAVED_STACK]], align 8 -// CHECK4-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP6]], align 16 -// CHECK4-NEXT: store i64 [[TMP6]], ptr [[__VLA_EXPR0]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP6]] -// CHECK4-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP9]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 0 +// CHECK4-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK4-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK4-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] +// CHECK4-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK4-NEXT: [[TMP8:%.*]] = add nuw i64 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK4-NEXT: [[TMP10:%.*]] = call ptr @llvm.stacksave() +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[SAVED_STACK]], align 8 +// CHECK4-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP8]], align 16 +// CHECK4-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP8]] +// CHECK4-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK4: omp.arrayinit.body: // CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK4-NEXT: store i16 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK4: omp.arrayinit.done: -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK4-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP14]] -// CHECK4-NEXT: store ptr [[TMP15]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP6]] to ptr -// CHECK4-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP21]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[TMP12]] to i64 +// CHECK4-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK4-NEXT: [[TMP15:%.*]] = sub i64 [[TMP13]], [[TMP14]] +// CHECK4-NEXT: [[TMP16:%.*]] = sdiv exact i64 [[TMP15]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr i16, ptr [[VLA]], i64 [[TMP16]] +// CHECK4-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[VLA]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP8]] to ptr +// CHECK4-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP22]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] // CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP24]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK4: omp.arraycpy.body: @@ -2267,10 +2349,10 @@ // CHECK4-NEXT: [[OMP_ARRAYCPY_DONE6:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT5]], [[TMP24]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] // CHECK4: omp.arraycpy.done7: -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP22]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP6]] +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr i16, ptr [[ARRAYIDX]], i64 [[TMP8]] // CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY8:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP27]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY8]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY9:%.*]] // CHECK4: omp.arraycpy.body9: @@ -2316,30 +2398,30 @@ // CHECK4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr i16, ptr [[TMP10]], i64 [[TMP14]] -// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP15]] +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP7]], i64 [[TMP10]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK4: omp.arraycpy.body: -// CHECK4-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK4-NEXT: [[TMP16:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 -// CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK4-NEXT: [[TMP17:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 -// CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK4-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK4-NEXT: [[TMP12:%.*]] = load i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 +// CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK4-NEXT: [[TMP13:%.*]] = load i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 2 +// CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP13]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK4-NEXT: store i16 [[CONV3]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2 // CHECK4-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK4-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i16, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK4-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK4-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK4: omp.arraycpy.done4: // CHECK4-NEXT: ret void @@ -2375,27 +2457,32 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr @g) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @g, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[G:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, align 128 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[G1]], align 128 -// CHECK4-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[G]], align 128 +// CHECK4-NEXT: store i32 1, ptr [[G]], align 128 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2407,30 +2494,30 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.2, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, [96 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP1]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[G]], align 128 +// CHECK4-NEXT: store volatile i32 [[TMP3]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK4-NEXT: call void [[TMP5]](ptr noundef [[BLOCK]]) -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[G1]], ptr [[TMP7]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.3, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP12]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[G]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.3, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[G1]], align 128 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 128 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[G]], align 128 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 128 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[G1]], align 128 -// CHECK4-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP15]] monotonic, align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[G]], align 128 +// CHECK4-NEXT: [[TMP13:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP12]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void @@ -2456,15 +2543,15 @@ // CHECK4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 128 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 128 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 128 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 128 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 128 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 128 // CHECK4-NEXT: ret void // // @@ -2476,6 +2563,7 @@ // CHECK4-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C5:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2494,14 +2582,22 @@ // CHECK4-NEXT: [[C6:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C6]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C5]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[THIS1]], ptr [[TMP2]], ptr [[B4]], ptr [[TMP3]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C5]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 // CHECK4-NEXT: [[B7:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 +// CHECK4-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 // CHECK4-NEXT: [[BF_LOAD8:%.*]] = load i8, ptr [[B7]], align 4 -// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP5]], 15 +// CHECK4-NEXT: [[BF_VALUE:%.*]] = and i8 [[TMP9]], 15 // CHECK4-NEXT: [[BF_CLEAR9:%.*]] = and i8 [[BF_LOAD8]], -16 // CHECK4-NEXT: [[BF_SET10:%.*]] = or i8 [[BF_CLEAR9]], [[BF_VALUE]] // CHECK4-NEXT: store i8 [[BF_SET10]], ptr [[B7]], align 4 @@ -2509,42 +2605,41 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK4-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK4-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -2556,54 +2651,54 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.7, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS_ADDR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[BLOCK_CAPTURED_THIS_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP6]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[BLOCK_CAPTURED7]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: store ptr [[TMP8]], ptr [[BLOCK_CAPTURED8]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK4-NEXT: call void [[TMP12]](ptr noundef [[BLOCK]]) -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[A2]], ptr [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP11]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[BLOCK_CAPTURED4]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 7 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP13]], ptr [[BLOCK_CAPTURED5]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 // CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK4-NEXT: store ptr [[C5]], ptr [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store i32 [[ADD9]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK4-NEXT: store i32 [[ADD10]], ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP21]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: store i32 [[ADD6]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK4-NEXT: store i32 [[ADD7]], ptr [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP30]] monotonic, align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP32]] monotonic, align 4 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP34]] monotonic, align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void @@ -2614,6 +2709,7 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED_THIS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 5 @@ -2632,97 +2728,104 @@ // CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP4]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR3]], align 8 +// CHECK4-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR4:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 8 +// CHECK4-NEXT: store ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 // CHECK4-NEXT: [[BLOCK_CAPTURE_ADDR5:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, ptr, ptr, ptr, i32 }>, ptr [[DOTBLOCK_DESCRIPTOR]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..5, ptr [[THIS]], ptr [[TMP5]], ptr [[BLOCK_CAPTURE_ADDR4]], ptr [[TMP6]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[BLOCK_CAPTURE_ADDR5]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[C5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[A2]], align 4 -// CHECK4-NEXT: store ptr [[A2]], ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store i32 0, ptr [[C5]], align 4 -// CHECK4-NEXT: store ptr [[C5]], ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP8]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[A2]], ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK4-NEXT: store ptr [[C5]], ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP8]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store i32 0, ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP13]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP15]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK4-NEXT: store ptr [[C]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], i32 3, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD7]], ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK4-NEXT: store i32 [[ADD8]], ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP18]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK4-NEXT: store i32 [[ADD5]], ptr [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP20]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[A2]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP27]] monotonic, align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP29]] monotonic, align 4 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[C5]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = atomicrmw add ptr [[TMP5]], i32 [[TMP31]] monotonic, align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = atomicrmw add ptr [[TMP9]], i32 [[TMP28]] monotonic, align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP30]] monotonic, align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[C]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = atomicrmw add ptr [[TMP10]], i32 [[TMP32]] monotonic, align 4 // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: // CHECK4-NEXT: ret void @@ -2736,31 +2839,31 @@ // CHECK4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK4-NEXT: store i32 [[ADD3]], ptr [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK4-NEXT: store i32 [[ADD3]], ptr [[TMP15]], align 4 // CHECK4-NEXT: ret void // // @@ -2772,30 +2875,30 @@ // CHECK4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK4-NEXT: store i32 [[ADD3]], ptr [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK4-NEXT: store i32 [[ADD3]], ptr [[TMP15]], align 4 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -40,229 +40,235 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP60]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP63]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP65]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP61]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP69]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP59]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP60]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP61]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP63]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP60]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP64]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP68]], ptr [[TMP60]]) // CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP71]], ptr [[TMP61]]) -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP74]], i32 0) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP83]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP71]], i32 0) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP78]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP84]], [[TMP85]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP79]], [[TMP80]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP87]] to i32 -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP82]] to i32 +// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP83]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP89]] monotonic, align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP84]] monotonic, align 4 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP93:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP98:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP93]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32 -// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP95]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP97:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP93]], i8 [[TMP96]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP98]] = extractvalue { i8, i1 } [[TMP97]], 0 -// CHECK1-NEXT: [[TMP99:%.*]] = extractvalue { i8, i1 } [[TMP97]], 1 -// CHECK1-NEXT: br i1 [[TMP99]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP88:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP93:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP88]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP89:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP89]] to i32 +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP92:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP88]], i8 [[TMP91]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP93]] = extractvalue { i8, i1 } [[TMP92]], 0 +// CHECK1-NEXT: [[TMP94:%.*]] = extractvalue { i8, i1 } [[TMP92]], 1 +// CHECK1-NEXT: br i1 [[TMP94]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP100:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP100]]) +// CHECK1-NEXT: [[TMP95:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP95]]) // CHECK1-NEXT: ret void // // @@ -273,8 +279,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -285,12 +291,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -385,59 +391,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -449,38 +455,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_sections_codegen.cpp b/clang/test/OpenMP/parallel_sections_codegen.cpp --- a/clang/test/OpenMP/parallel_sections_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_codegen.cpp @@ -57,17 +57,19 @@ // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: ret i32 [[CALL]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -75,28 +77,30 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] @@ -113,18 +117,18 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP10:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i32 } [[TMP10]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP11]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -138,15 +142,17 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -154,28 +160,30 @@ // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: @@ -186,17 +194,17 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP10:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP11:%.*]] = extractvalue { ptr, i32 } [[TMP10]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP11]]) #[[ATTR7]] +// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR7]] // CHECK1-NEXT: unreachable // diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -40,271 +40,277 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP58:%.*]] = icmp slt i32 [[TMP57]], 0 -// CHECK1-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[TMP57]], i32 0 -// CHECK1-NEXT: store i32 [[TMP59]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP60]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP57:%.*]] = icmp slt i32 [[TMP56]], 0 +// CHECK1-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 0 +// CHECK1-NEXT: store i32 [[TMP58]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP59]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP61]], [[TMP62]] +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP60]], [[TMP61]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP63]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP62]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP67]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 -// CHECK1-NEXT: [[TMP70:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP69]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP70]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP72]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[TMP73]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP74]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP70]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP76]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP78]], ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP80]], ptr [[TMP70]]) +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[TMP71]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP72]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP73]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP77]], ptr [[TMP69]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP82]], 1 +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP79]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP83:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP84]]) -// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP86]], i32 1) -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP87]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP91:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP91]], ptr [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = load i32, ptr [[TMP92]], align 4 -// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP93]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP81]]) +// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP83]], i32 1) +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP87:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP87]], ptr [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP88]], align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP89]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP90]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP91]], [[TMP92]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP98]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP93]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP93]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP89]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP101]] monotonic, align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP104]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP96]] monotonic, align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 -// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 -// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP100:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP105:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP100]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP100]], i8 [[TMP103]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP105]] = extractvalue { i8, i1 } [[TMP104]], 0 +// CHECK1-NEXT: [[TMP106:%.*]] = extractvalue { i8, i1 } [[TMP104]], 1 +// CHECK1-NEXT: br i1 [[TMP106]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP103]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) +// CHECK1-NEXT: [[TMP107:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP107]]) // CHECK1-NEXT: ret void // // @@ -315,8 +321,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -327,12 +333,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -427,59 +433,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -491,38 +497,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/reduction_compound_op.cpp b/clang/test/OpenMP/reduction_compound_op.cpp --- a/clang/test/OpenMP/reduction_compound_op.cpp +++ b/clang/test/OpenMP/reduction_compound_op.cpp @@ -18,7 +18,6 @@ //RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-unknown-linux-gnu -fopenmp-simd -DNORM -DCOMP \ //RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix SIMD-ONLY -// SIMD-ONLY-NOT: {{__kmpc|__tgt}} struct Point { int x = 0; @@ -85,17 +84,73 @@ // NORM-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// NORM-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // NORM-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 // NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]] -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..5, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..13, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) +// NORM-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP2]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP5]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP7]], align 8 +// NORM-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP8]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP9]], align 8 +// NORM-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP10]], align 8 +// NORM-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP11]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_3]]) +// NORM-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 8 +// NORM-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP13]], align 8 +// NORM-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP14]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_4]]) +// NORM-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP15]], align 8 +// NORM-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP16]], align 8 +// NORM-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP17]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_5]]) +// NORM-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP18]], align 8 +// NORM-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP19]], align 8 +// NORM-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP20]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_6]]) +// NORM-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// NORM-NEXT: store ptr [[N_ADDR]], ptr [[TMP21]], align 8 +// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP22]], align 8 +// NORM-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 2 +// NORM-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP23]], align 8 +// NORM-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_7]]) // NORM-NEXT: ret void // // @@ -110,13 +165,11 @@ // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined. -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -126,107 +179,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -243,25 +298,23 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..1 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -271,107 +324,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -388,25 +443,23 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointplERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..3 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -416,107 +469,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -533,25 +588,23 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointmlERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..5 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -561,107 +614,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -678,25 +733,23 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointanERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..7 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -706,107 +759,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -823,25 +878,23 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointorERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..9 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -851,107 +904,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -968,25 +1023,23 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointeoERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..11 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -996,107 +1049,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -1113,25 +1168,23 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // // NORM-LABEL: define {{[^@]+}}@.omp_outlined..13 -// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// NORM-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // NORM-NEXT: entry: // NORM-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// NORM-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// NORM-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // NORM-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // NORM-NEXT: [[TMP:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1141,107 +1194,109 @@ // NORM-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// NORM-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// NORM-NEXT: [[I4:%.*]] = alloca i32, align 4 +// NORM-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// NORM-NEXT: [[I3:%.*]] = alloca i32, align 4 // NORM-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // NORM-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// NORM-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// NORM-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // NORM-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// NORM-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// NORM-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// NORM-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// NORM-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// NORM-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// NORM-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// NORM-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// NORM-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// NORM-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// NORM-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// NORM-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// NORM-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// NORM-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // NORM-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // NORM-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // NORM-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: store i32 0, ptr [[I]], align 4 -// NORM-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// NORM-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // NORM-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // NORM: omp.precond.then: // NORM-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // NORM-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// NORM-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// NORM-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// NORM-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// NORM-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// NORM-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// NORM-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// NORM-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // NORM: cond.true: -// NORM-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // NORM-NEXT: br label [[COND_END:%.*]] // NORM: cond.false: -// NORM-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // NORM-NEXT: br label [[COND_END]] // NORM: cond.end: -// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// NORM-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // NORM-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// NORM-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// NORM-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // NORM: omp.inner.for.cond: -// NORM-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// NORM-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// NORM-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// NORM-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// NORM-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// NORM-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// NORM-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // NORM: omp.inner.for.body: -// NORM-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// NORM-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// NORM-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// NORM-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// NORM-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// NORM-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// NORM-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// NORM-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// NORM-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// NORM-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // NORM-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // NORM: omp.body.continue: // NORM-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // NORM: omp.inner.for.inc: -// NORM-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// NORM-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// NORM-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// NORM-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// NORM-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // NORM-NEXT: br label [[OMP_INNER_FOR_COND]] // NORM: omp.inner.for.end: // NORM-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // NORM: omp.loop.exit: -// NORM-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// NORM-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// NORM-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // NORM-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // NORM-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// NORM-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// NORM-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// NORM-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// NORM-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// NORM-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// NORM-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// NORM-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // NORM-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // NORM-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // NORM-NEXT: ] // NORM: .omp.reduction.case1: -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// NORM-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.case2: -// NORM-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// NORM-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// NORM-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// NORM-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// NORM-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// NORM-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// NORM-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// NORM-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// NORM-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// NORM-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// NORM-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // NORM-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // NORM: .omp.reduction.default: // NORM-NEXT: br label [[OMP_PRECOND_END]] @@ -1258,14 +1313,14 @@ // NORM-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // NORM-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // NORM-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// NORM-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// NORM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// NORM-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// NORM-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// NORM-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // NORM-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// NORM-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// NORM-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// NORM-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // NORM-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// NORM-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // NORM-NEXT: ret void // // @@ -1288,17 +1343,73 @@ // COMP-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_2:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_3:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_4:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_5:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_6:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 +// COMP-NEXT: [[OMP_OUTLINED_ARG_AGG_7:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // COMP-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 // COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4:[0-9]+]] -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..5, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) -// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..13, ptr [[N_ADDR]], ptr [[RED]], ptr [[POINTS_ADDR]]) +// COMP-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP0]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP2]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_1]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP5]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP7]], align 8 +// COMP-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_2]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP8]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_2]]) +// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP9]], align 8 +// COMP-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP10]], align 8 +// COMP-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_3]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP11]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_3]]) +// COMP-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 8 +// COMP-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP13]], align 8 +// COMP-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_4]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP14]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_4]]) +// COMP-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP15]], align 8 +// COMP-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP16]], align 8 +// COMP-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_5]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP17]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_5]]) +// COMP-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP18]], align 8 +// COMP-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP19]], align 8 +// COMP-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_6]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP20]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_6]]) +// COMP-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 0 +// COMP-NEXT: store ptr [[N_ADDR]], ptr [[TMP21]], align 8 +// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 1 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP22]], align 8 +// COMP-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_7]], i32 0, i32 2 +// COMP-NEXT: store ptr [[POINTS_ADDR]], ptr [[TMP23]], align 8 +// COMP-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_7]]) // COMP-NEXT: ret void // // @@ -1313,13 +1424,11 @@ // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined. -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2:[0-9]+]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1329,101 +1438,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1439,23 +1550,21 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: ret void // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..1 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1465,101 +1574,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1575,23 +1686,21 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointpLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: ret void // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..3 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1601,101 +1710,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.4, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1711,23 +1822,21 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointmLERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: ret void // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..5 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1737,101 +1846,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1847,23 +1958,21 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaNERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: ret void // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..7 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1873,101 +1982,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.8, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -1983,23 +2094,21 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointoRERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: ret void // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..9 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2009,101 +2118,103 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP29]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2119,23 +2230,21 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointeOERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: ret void // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..11 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2145,107 +2254,109 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// COMP-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// COMP-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.12, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // COMP-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// COMP-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// COMP-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2262,25 +2373,23 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointaaERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// COMP-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // COMP-NEXT: ret void // // // COMP-LABEL: define {{[^@]+}}@.omp_outlined..13 -// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr nonnull align 4 dereferenceable(8) [[RED:%.*]], ptr nonnull align 8 dereferenceable(8) [[POINTS:%.*]]) #[[ATTR2]] { +// COMP-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // COMP-NEXT: entry: // COMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[RED_ADDR:%.*]] = alloca ptr, align 8 -// COMP-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// COMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // COMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // COMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2290,107 +2399,109 @@ // COMP-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// COMP-NEXT: [[RED3:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 -// COMP-NEXT: [[I4:%.*]] = alloca i32, align 4 +// COMP-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// COMP-NEXT: [[I3:%.*]] = alloca i32, align 4 // COMP-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // COMP-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_POINT]], align 4 -// COMP-NEXT: [[REF_TMP10:%.*]] = alloca [[STRUCT_POINT]], align 4 +// COMP-NEXT: [[REF_TMP9:%.*]] = alloca [[STRUCT_POINT]], align 4 // COMP-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// COMP-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// COMP-NEXT: store ptr [[RED]], ptr [[RED_ADDR]], align 8 -// COMP-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// COMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RED_ADDR]], align 8 -// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 -// COMP-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// COMP-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP4]], 0 +// COMP-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// COMP-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// COMP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// COMP-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// COMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// COMP-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// COMP-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[SUB:%.*]] = sub i32 [[TMP8]], 0 // COMP-NEXT: [[DIV:%.*]] = udiv i32 [[SUB]], 1 // COMP-NEXT: [[SUB2:%.*]] = sub i32 [[DIV]], 1 // COMP-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: store i32 0, ptr [[I]], align 4 -// COMP-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP5]] +// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// COMP-NEXT: [[CMP:%.*]] = icmp ult i32 0, [[TMP9]] // COMP-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // COMP: omp.precond.then: // COMP-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // COMP-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED3]]) #[[ATTR4]] -// COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// COMP-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// COMP-NEXT: [[CMP5:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// COMP-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// COMP-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR4]] +// COMP-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// COMP-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]] +// COMP-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // COMP: cond.true: -// COMP-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // COMP-NEXT: br label [[COND_END:%.*]] // COMP: cond.false: -// COMP-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // COMP-NEXT: br label [[COND_END]] // COMP: cond.end: -// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// COMP-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // COMP-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// COMP-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// COMP-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // COMP: omp.inner.for.cond: -// COMP-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP15]], 1 -// COMP-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP14]], [[ADD]] -// COMP-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// COMP-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// COMP-NEXT: [[ADD:%.*]] = add i32 [[TMP19]], 1 +// COMP-NEXT: [[CMP5:%.*]] = icmp ult i32 [[TMP18]], [[ADD]] +// COMP-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // COMP: omp.inner.for.body: -// COMP-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], 1 -// COMP-NEXT: [[ADD7:%.*]] = add i32 0, [[MUL]] -// COMP-NEXT: store i32 [[ADD7]], ptr [[I4]], align 4 -// COMP-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// COMP-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP2]], align 8 -// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED3]], i32 [[TMP17]], ptr [[TMP18]]) +// COMP-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[MUL:%.*]] = mul i32 [[TMP20]], 1 +// COMP-NEXT: [[ADD6:%.*]] = add i32 0, [[MUL]] +// COMP-NEXT: store i32 [[ADD6]], ptr [[I3]], align 4 +// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// COMP-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP6]], align 8 +// COMP-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP21]], ptr [[TMP22]]) // COMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // COMP: omp.body.continue: // COMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // COMP: omp.inner.for.inc: -// COMP-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// COMP-NEXT: [[ADD8:%.*]] = add i32 [[TMP19]], 1 -// COMP-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// COMP-NEXT: [[ADD7:%.*]] = add i32 [[TMP23]], 1 +// COMP-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // COMP-NEXT: br label [[OMP_INNER_FOR_COND]] // COMP: omp.inner.for.end: // COMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // COMP: omp.loop.exit: -// COMP-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// COMP-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// COMP-NEXT: store ptr [[RED3]], ptr [[TMP22]], align 8 // COMP-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // COMP-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// COMP-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) -// COMP-NEXT: switch i32 [[TMP27]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// COMP-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// COMP-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// COMP-NEXT: store ptr [[RED]], ptr [[TMP26]], align 8 +// COMP-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// COMP-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // COMP-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // COMP-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // COMP-NEXT: ] // COMP: .omp.reduction.case1: -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) // COMP-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// COMP-NEXT: [[CALL9:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) -// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP25]], ptr @.gomp_critical_user_.reduction.var) +// COMP-NEXT: [[CALL8:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP28]], ptr @.gomp_critical_user_.reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.case2: -// COMP-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// COMP-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) -// COMP-NEXT: [[CALL11:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[RED3]]) -// COMP-NEXT: store i64 [[CALL11]], ptr [[REF_TMP10]], align 4 -// COMP-NEXT: [[CALL12:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP1]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP10]]) -// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP30]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// COMP-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// COMP-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) +// COMP-NEXT: [[CALL10:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[RED]]) +// COMP-NEXT: store i64 [[CALL10]], ptr [[REF_TMP9]], align 4 +// COMP-NEXT: [[CALL11:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP4]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP9]]) +// COMP-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP31]], ptr @.gomp_critical_user_.atomic_reduction.var) // COMP-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // COMP: .omp.reduction.default: // COMP-NEXT: br label [[OMP_PRECOND_END]] @@ -2407,14 +2518,14 @@ // COMP-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // COMP-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // COMP-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// COMP-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// COMP-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// COMP-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// COMP-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// COMP-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // COMP-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// COMP-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// COMP-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[TMP7]]) +// COMP-NEXT: [[CALL:%.*]] = call i64 @_ZNK5PointooERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[TMP5]]) // COMP-NEXT: store i64 [[CALL]], ptr [[REF_TMP]], align 4 -// COMP-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP10]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) +// COMP-NEXT: [[CALL2:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZN5PointaSERKS_(ptr nonnull align 4 dereferenceable(8) [[TMP7]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // COMP-NEXT: ret void // // @@ -2430,3 +2541,190 @@ // COMP-NEXT: store i32 0, ptr [[Y]], align 4 // COMP-NEXT: ret void // +// +// SIMD-ONLY-LABEL: define {{[^@]+}}@_Z3fooiPK5Point +// SIMD-ONLY-SAME: (i32 [[N:%.*]], ptr [[POINTS:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY-NEXT: entry: +// SIMD-ONLY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[POINTS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY-NEXT: [[RED:%.*]] = alloca [[STRUCT_POINT:%.*]], align 4 +// SIMD-ONLY-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I1:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I8:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I15:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I22:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I29:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I36:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: [[I43:%.*]] = alloca i32, align 4 +// SIMD-ONLY-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: store ptr [[POINTS]], ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_ZN5PointC1Ev(ptr nonnull align 4 dereferenceable(8) [[RED]]) #[[ATTR3:[0-9]+]] +// SIMD-ONLY-NEXT: store i32 0, ptr [[I]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY: for.cond: +// SIMD-ONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP0]], [[TMP1]] +// SIMD-ONLY-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY: for.body: +// SIMD-ONLY-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY-NEXT: [[TMP3:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP2]], ptr [[TMP3]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY: for.inc: +// SIMD-ONLY-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4 +// SIMD-ONLY-NEXT: [[INC:%.*]] = add i32 [[TMP4]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC]], ptr [[I]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY: for.end: +// SIMD-ONLY-NEXT: store i32 0, ptr [[I1]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND2:%.*]] +// SIMD-ONLY: for.cond2: +// SIMD-ONLY-NEXT: [[TMP5:%.*]] = load i32, ptr [[I1]], align 4 +// SIMD-ONLY-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP3:%.*]] = icmp ult i32 [[TMP5]], [[TMP6]] +// SIMD-ONLY-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END7:%.*]] +// SIMD-ONLY: for.body4: +// SIMD-ONLY-NEXT: [[TMP7:%.*]] = load i32, ptr [[I1]], align 4 +// SIMD-ONLY-NEXT: [[TMP8:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP7]], ptr [[TMP8]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC5:%.*]] +// SIMD-ONLY: for.inc5: +// SIMD-ONLY-NEXT: [[TMP9:%.*]] = load i32, ptr [[I1]], align 4 +// SIMD-ONLY-NEXT: [[INC6:%.*]] = add i32 [[TMP9]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC6]], ptr [[I1]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY: for.end7: +// SIMD-ONLY-NEXT: store i32 0, ptr [[I8]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND9:%.*]] +// SIMD-ONLY: for.cond9: +// SIMD-ONLY-NEXT: [[TMP10:%.*]] = load i32, ptr [[I8]], align 4 +// SIMD-ONLY-NEXT: [[TMP11:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP10]], [[TMP11]] +// SIMD-ONLY-NEXT: br i1 [[CMP10]], label [[FOR_BODY11:%.*]], label [[FOR_END14:%.*]] +// SIMD-ONLY: for.body11: +// SIMD-ONLY-NEXT: [[TMP12:%.*]] = load i32, ptr [[I8]], align 4 +// SIMD-ONLY-NEXT: [[TMP13:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP12]], ptr [[TMP13]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC12:%.*]] +// SIMD-ONLY: for.inc12: +// SIMD-ONLY-NEXT: [[TMP14:%.*]] = load i32, ptr [[I8]], align 4 +// SIMD-ONLY-NEXT: [[INC13:%.*]] = add i32 [[TMP14]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC13]], ptr [[I8]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND9]], !llvm.loop [[LOOP5:![0-9]+]] +// SIMD-ONLY: for.end14: +// SIMD-ONLY-NEXT: store i32 0, ptr [[I15]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND16:%.*]] +// SIMD-ONLY: for.cond16: +// SIMD-ONLY-NEXT: [[TMP15:%.*]] = load i32, ptr [[I15]], align 4 +// SIMD-ONLY-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP17:%.*]] = icmp ult i32 [[TMP15]], [[TMP16]] +// SIMD-ONLY-NEXT: br i1 [[CMP17]], label [[FOR_BODY18:%.*]], label [[FOR_END21:%.*]] +// SIMD-ONLY: for.body18: +// SIMD-ONLY-NEXT: [[TMP17:%.*]] = load i32, ptr [[I15]], align 4 +// SIMD-ONLY-NEXT: [[TMP18:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP17]], ptr [[TMP18]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC19:%.*]] +// SIMD-ONLY: for.inc19: +// SIMD-ONLY-NEXT: [[TMP19:%.*]] = load i32, ptr [[I15]], align 4 +// SIMD-ONLY-NEXT: [[INC20:%.*]] = add i32 [[TMP19]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC20]], ptr [[I15]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP6:![0-9]+]] +// SIMD-ONLY: for.end21: +// SIMD-ONLY-NEXT: store i32 0, ptr [[I22]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND23:%.*]] +// SIMD-ONLY: for.cond23: +// SIMD-ONLY-NEXT: [[TMP20:%.*]] = load i32, ptr [[I22]], align 4 +// SIMD-ONLY-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP24:%.*]] = icmp ult i32 [[TMP20]], [[TMP21]] +// SIMD-ONLY-NEXT: br i1 [[CMP24]], label [[FOR_BODY25:%.*]], label [[FOR_END28:%.*]] +// SIMD-ONLY: for.body25: +// SIMD-ONLY-NEXT: [[TMP22:%.*]] = load i32, ptr [[I22]], align 4 +// SIMD-ONLY-NEXT: [[TMP23:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP22]], ptr [[TMP23]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC26:%.*]] +// SIMD-ONLY: for.inc26: +// SIMD-ONLY-NEXT: [[TMP24:%.*]] = load i32, ptr [[I22]], align 4 +// SIMD-ONLY-NEXT: [[INC27:%.*]] = add i32 [[TMP24]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC27]], ptr [[I22]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND23]], !llvm.loop [[LOOP7:![0-9]+]] +// SIMD-ONLY: for.end28: +// SIMD-ONLY-NEXT: store i32 0, ptr [[I29]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND30:%.*]] +// SIMD-ONLY: for.cond30: +// SIMD-ONLY-NEXT: [[TMP25:%.*]] = load i32, ptr [[I29]], align 4 +// SIMD-ONLY-NEXT: [[TMP26:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP25]], [[TMP26]] +// SIMD-ONLY-NEXT: br i1 [[CMP31]], label [[FOR_BODY32:%.*]], label [[FOR_END35:%.*]] +// SIMD-ONLY: for.body32: +// SIMD-ONLY-NEXT: [[TMP27:%.*]] = load i32, ptr [[I29]], align 4 +// SIMD-ONLY-NEXT: [[TMP28:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP27]], ptr [[TMP28]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC33:%.*]] +// SIMD-ONLY: for.inc33: +// SIMD-ONLY-NEXT: [[TMP29:%.*]] = load i32, ptr [[I29]], align 4 +// SIMD-ONLY-NEXT: [[INC34:%.*]] = add i32 [[TMP29]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC34]], ptr [[I29]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]] +// SIMD-ONLY: for.end35: +// SIMD-ONLY-NEXT: store i32 0, ptr [[I36]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND37:%.*]] +// SIMD-ONLY: for.cond37: +// SIMD-ONLY-NEXT: [[TMP30:%.*]] = load i32, ptr [[I36]], align 4 +// SIMD-ONLY-NEXT: [[TMP31:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP38:%.*]] = icmp ult i32 [[TMP30]], [[TMP31]] +// SIMD-ONLY-NEXT: br i1 [[CMP38]], label [[FOR_BODY39:%.*]], label [[FOR_END42:%.*]] +// SIMD-ONLY: for.body39: +// SIMD-ONLY-NEXT: [[TMP32:%.*]] = load i32, ptr [[I36]], align 4 +// SIMD-ONLY-NEXT: [[TMP33:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP32]], ptr [[TMP33]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC40:%.*]] +// SIMD-ONLY: for.inc40: +// SIMD-ONLY-NEXT: [[TMP34:%.*]] = load i32, ptr [[I36]], align 4 +// SIMD-ONLY-NEXT: [[INC41:%.*]] = add i32 [[TMP34]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC41]], ptr [[I36]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND37]], !llvm.loop [[LOOP9:![0-9]+]] +// SIMD-ONLY: for.end42: +// SIMD-ONLY-NEXT: store i32 0, ptr [[I43]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND44:%.*]] +// SIMD-ONLY: for.cond44: +// SIMD-ONLY-NEXT: [[TMP35:%.*]] = load i32, ptr [[I43]], align 4 +// SIMD-ONLY-NEXT: [[TMP36:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// SIMD-ONLY-NEXT: [[CMP45:%.*]] = icmp ult i32 [[TMP35]], [[TMP36]] +// SIMD-ONLY-NEXT: br i1 [[CMP45]], label [[FOR_BODY46:%.*]], label [[FOR_END49:%.*]] +// SIMD-ONLY: for.body46: +// SIMD-ONLY-NEXT: [[TMP37:%.*]] = load i32, ptr [[I43]], align 4 +// SIMD-ONLY-NEXT: [[TMP38:%.*]] = load ptr, ptr [[POINTS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_Z4workR5PointiPKS_(ptr nonnull align 4 dereferenceable(8) [[RED]], i32 [[TMP37]], ptr [[TMP38]]) +// SIMD-ONLY-NEXT: br label [[FOR_INC47:%.*]] +// SIMD-ONLY: for.inc47: +// SIMD-ONLY-NEXT: [[TMP39:%.*]] = load i32, ptr [[I43]], align 4 +// SIMD-ONLY-NEXT: [[INC48:%.*]] = add i32 [[TMP39]], 1 +// SIMD-ONLY-NEXT: store i32 [[INC48]], ptr [[I43]], align 4 +// SIMD-ONLY-NEXT: br label [[FOR_COND44]], !llvm.loop [[LOOP10:![0-9]+]] +// SIMD-ONLY: for.end49: +// SIMD-ONLY-NEXT: ret void +// +// +// SIMD-ONLY-LABEL: define {{[^@]+}}@_ZN5PointC1Ev +// SIMD-ONLY-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY-NEXT: entry: +// SIMD-ONLY-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: call void @_ZN5PointC2Ev(ptr nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]] +// SIMD-ONLY-NEXT: ret void +// +// +// SIMD-ONLY-LABEL: define {{[^@]+}}@_ZN5PointC2Ev +// SIMD-ONLY-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY-NEXT: entry: +// SIMD-ONLY-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_POINT:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY-NEXT: store i32 0, ptr [[X]], align 4 +// SIMD-ONLY-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_POINT]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY-NEXT: store i32 0, ptr [[Y]], align 4 +// SIMD-ONLY-NEXT: ret void +// diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -101,17 +101,23 @@ // CHECK-SAME: (ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8 +// CHECK-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false, i1 true) // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK: user_code.entry: // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 -// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[E_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8 +// CHECK-NEXT: store [[STRUCT_ANON]] [[TMP6]], ptr [[TMP5]], align 8 +// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP5]]) +// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[TMP4]], i64 8) // CHECK-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2, i1 true) // CHECK-NEXT: ret void // CHECK: worker.exit: @@ -119,44 +125,46 @@ // // // CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 -// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[E:%.*]] = alloca double, align 8 // CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 0 -// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 0 -// CHECK-NEXT: store double 0.000000e+00, ptr [[E2]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 -// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] -// CHECK-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[E2]], i64 [[TMP6]] -// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: store double 1.000000e+01, ptr [[TMP8]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK-NEXT: store ptr [[E2]], ptr [[TMP11]], align 8 -// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB2]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) +// CHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0 +// CHECK-NEXT: store double 0.000000e+00, ptr [[E]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[E]], i64 [[TMP8]] +// CHECK-NEXT: store ptr [[TMP9]], ptr [[TMP]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK-NEXT: store double 1.000000e+01, ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK-NEXT: store ptr [[E]], ptr [[TMP13]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB2]], i32 [[TMP12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func) // CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 // CHECK-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK: .omp.reduction.then: // CHECK-NEXT: [[TMP16:%.*]] = load double, ptr [[ARRAYIDX]], align 8 -// CHECK-NEXT: [[TMP17:%.*]] = load double, ptr [[E2]], align 8 +// CHECK-NEXT: [[TMP17:%.*]] = load double, ptr [[E]], align 8 // CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]] // CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX]], align 8 -// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP10]]) +// CHECK-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP12]]) // CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK: .omp.reduction.done: // CHECK-NEXT: ret void @@ -176,51 +184,51 @@ // CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 // CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 // CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 1 -// CHECK-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP11]], align 8 -// CHECK-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 -// CHECK-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) -// CHECK-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr i64, ptr [[TMP11]], i64 1 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP12]], align 8 -// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1 +// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() +// CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 +// CHECK-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) +// CHECK-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1 +// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 +// CHECK-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] +// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 +// CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 +// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 +// CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] +// CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 // CHECK-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 -// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 -// CHECK-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] -// CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] -// CHECK-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] -// CHECK-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] +// CHECK-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] +// CHECK-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK: then: // CHECK-NEXT: call void @"_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: br label [[IFCONT:%.*]] // CHECK: else: // CHECK-NEXT: br label [[IFCONT]] // CHECK: ifcont: -// CHECK-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] -// CHECK-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 +// CHECK-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] +// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK: then4: -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 -// CHECK-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = load double, ptr [[TMP43]], align 8 -// CHECK-NEXT: store double [[TMP47]], ptr [[TMP46]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8 +// CHECK-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 // CHECK-NEXT: br label [[IFCONT6:%.*]] // CHECK: else5: // CHECK-NEXT: br label [[IFCONT6]] @@ -246,41 +254,41 @@ // CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 // CHECK-NEXT: br label [[PRECOND:%.*]] // CHECK: precond: -// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 -// CHECK-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2 +// CHECK-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK: body: // CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) // CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 // CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK: then: -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 -// CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP11]], i32 [[TMP8]] -// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK-NEXT: store volatile i32 [[TMP15]], ptr addrspace(3) [[TMP14]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]] +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4 // CHECK-NEXT: br label [[IFCONT:%.*]] // CHECK: else: // CHECK-NEXT: br label [[IFCONT]] // CHECK: ifcont: // CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP2]]) -// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] +// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4 +// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]] // CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] // CHECK: then2: -// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 -// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP19]], i32 [[TMP8]] -// CHECK-NEXT: [[TMP22:%.*]] = load volatile i32, ptr addrspace(3) [[TMP17]], align 4 -// CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0 +// CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]] +// CHECK-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4 +// CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 // CHECK-NEXT: br label [[IFCONT4:%.*]] // CHECK: else3: // CHECK-NEXT: br label [[IFCONT4]] // CHECK: ifcont4: -// CHECK-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK-NEXT: store i32 [[TMP23]], ptr [[DOTCNT_ADDR]], align 4 +// CHECK-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4 // CHECK-NEXT: br label [[PRECOND]] // CHECK: exit: // CHECK-NEXT: ret void @@ -310,68 +318,68 @@ // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[O]], i64 0, i64 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[O]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50(ptr [[O]]) #[[ATTR8:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[B]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK1-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK1: omp_offload.failed5: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55(ptr [[B]]) #[[ATTR8]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]] @@ -393,32 +401,37 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(20) [[O:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[O1:%.*]] = alloca [[CLASS_S2:%.*]], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[O:%.*]] = alloca [[CLASS_S2:%.*]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[TMP0]], i64 0, i64 0 -// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[O1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr ([[CLASS_S2]], ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[CLASS_S2]], ptr [[O1]], i64 [[TMP5]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[O]]) +// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr ([[CLASS_S2]], ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr [[CLASS_S2]], ptr [[O]], i64 [[TMP6]] // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: @@ -434,26 +447,26 @@ // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: for.end: // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[O1]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[O]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]]) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O]]) // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP13]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP12]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O1]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL2]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP19]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX]], ptr noundef nonnull align 4 dereferenceable(4) [[O]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX]], ptr align 4 [[CALL1]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP15]], ptr @.gomp_critical_user_.atomic_reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -467,13 +480,13 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP10]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN2S2plERS_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP7]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP5]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[CALL]], i64 4, i1 false) // CHECK1-NEXT: ret void // // @@ -481,18 +494,21 @@ // CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8000) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -507,176 +523,178 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX1]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY2]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX4]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY5]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY7:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX6]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY7]], i64 1 -// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 -// CHECK1-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP3:%.*]] = sub i64 [[TMP1]], [[TMP2]] -// CHECK1-NEXT: [[TMP4:%.*]] = sdiv exact i64 [[TMP3]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP5:%.*]] = add nuw i64 [[TMP4]], 1 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP5]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP8]] +// CHECK1-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP7:%.*]] = add nuw i64 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP7]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store double 0.000000e+00, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP13]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP2]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[VLA]], i64 [[TMP14]] // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [1 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX9]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY10]], i64 2 // CHECK1-NEXT: [[ARRAYDECAY12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY12]], i64 1 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP0]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x double]]], ptr [[TMP2]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYDECAY15:%.*]] = getelementptr inbounds [10 x [10 x double]], ptr [[ARRAYIDX14]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYDECAY15]], i64 5 // CHECK1-NEXT: [[ARRAYDECAY17:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX16]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYDECAY17]], i64 1 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX13]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX18]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX13]] to i64 -// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP24:%.*]] = add nuw i64 [[TMP23]], 1 -// CHECK1-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP24]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP32]], i32 1, i32 1, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP34]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP36]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP37]], 9 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX13]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX18]] to i64 +// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[ARRAYIDX13]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = add nuw i64 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB2]], i32 [[TMP30]], i32 1, i32 1, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP33]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP34]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP38]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP35]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP39]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP36]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sle i64 [[TMP37]], [[TMP38]] // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP42]], 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP39]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP43]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP40]], 1 // CHECK1-NEXT: store i64 [[ADD20]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP45]]) -// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP47]], i32 1) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP51:%.*]] = inttoptr i64 [[TMP5]] to ptr -// CHECK1-NEXT: store ptr [[TMP51]], ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP53]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP55]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP42]]) +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB2]], i32 [[TMP44]], i32 1) +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP47:%.*]] = inttoptr i64 [[TMP7]] to ptr +// CHECK1-NEXT: store ptr [[TMP47]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP49]], i32 1, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP50]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP56]] +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP51]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP57:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 -// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP57]], [[TMP58]] +// CHECK1-NEXT: [[TMP52:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 +// CHECK1-NEXT: [[ADD22:%.*]] = fadd double [[TMP52]], [[TMP53]] // CHECK1-NEXT: store double [[ADD22]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT23]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP56]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT23]], [[TMP51]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done25: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP53]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB1]], i32 [[TMP49]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP59]] +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr double, ptr [[ARRAYIDX3]], i64 [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY26:%.*]] = icmp eq ptr [[ARRAYIDX3]], [[TMP54]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY26]], label [[OMP_ARRAYCPY_DONE33:%.*]], label [[OMP_ARRAYCPY_BODY27:%.*]] // CHECK1: omp.arraycpy.body27: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST28:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT31:%.*]], [[OMP_ARRAYCPY_BODY27]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST29:%.*]] = phi ptr [ [[ARRAYIDX3]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT30:%.*]], [[OMP_ARRAYCPY_BODY27]] ] -// CHECK1-NEXT: [[TMP60:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = atomicrmw fadd ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP60]] monotonic, align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = atomicrmw fadd ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], double [[TMP55]] monotonic, align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT30]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST29]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT31]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST28]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP59]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE32:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT30]], [[TMP54]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_BODY27]] // CHECK1: omp.arraycpy.done33: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP62:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP62]]) +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP57]]) // CHECK1-NEXT: ret void // // @@ -687,17 +705,17 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i64 [[TMP5]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP3]], [[TMP6]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store double 0.000000e+00, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -712,21 +730,21 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[TMP5]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP5]], [[TMP8]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP7]], [[TMP8]] // CHECK1-NEXT: store double [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -740,27 +758,27 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP10]], i64 [[TMP14]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP7]], i64 [[TMP10]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP16:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] // CHECK1-NEXT: store double [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -844,225 +862,225 @@ // CHECK2-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK2-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 24, i1 false) -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP12]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP14]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK2-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP19]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK2-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[ARRAYIDX1]], ptr [[TMP24]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2 -// CHECK2-NEXT: store i64 [[TMP10]], ptr [[TMP26]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[ARRAYIDX1]], ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 2 +// CHECK2-NEXT: store i64 [[TMP10]], ptr [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-NEXT: store ptr null, ptr [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK2-NEXT: [[TMP34:%.*]] = zext i32 [[ADD]] to i64 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], 1 +// CHECK2-NEXT: [[TMP27:%.*]] = zext i32 [[ADD]] to i64 // CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 1, ptr [[TMP35]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK2-NEXT: store i32 3, ptr [[TMP36]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP37]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP29]], ptr [[TMP38]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK2-NEXT: store ptr [[TMP30]], ptr [[TMP39]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK2-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK2-NEXT: store ptr null, ptr [[TMP41]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK2-NEXT: store ptr null, ptr [[TMP42]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK2-NEXT: store i64 [[TMP34]], ptr [[TMP43]], align 8 -// CHECK2-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.region_id, ptr [[KERNEL_ARGS]]) -// CHECK2-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK2-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 3, ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP30]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store ptr @.offload_maptypes, ptr [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 [[TMP27]], ptr [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK2-NEXT: br i1 [[TMP38]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK2: omp_offload.failed: // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l69(i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK2: omp_offload.cont: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP46]], ptr [[SIZE_CASTED4]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[SIZE_CASTED4]], align 4 -// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP49:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP50:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP51]], i32 0 -// CHECK2-NEXT: [[TMP52:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP53]], i32 0 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = mul nuw i32 [[TMP54]], 4 -// CHECK2-NEXT: [[TMP56:%.*]] = sext i32 [[TMP55]] to i64 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP39]], ptr [[SIZE_CASTED4]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[SIZE_CASTED4]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP44]], i32 0 +// CHECK2-NEXT: [[TMP45:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP46]], i32 0 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 4 +// CHECK2-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 // CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES10]], ptr align 4 @.offload_sizes.7, i32 24, i1 false) -// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP47]], ptr [[TMP58]], align 4 -// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP47]], ptr [[TMP60]], align 4 -// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 -// CHECK2-NEXT: store ptr null, ptr [[TMP62]], align 4 -// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP50]], ptr [[TMP63]], align 4 -// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[ARRAYIDX5]], ptr [[TMP65]], align 4 -// CHECK2-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 1 -// CHECK2-NEXT: store ptr null, ptr [[TMP67]], align 4 -// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP52]], ptr [[TMP68]], align 4 -// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[ARRAYIDX6]], ptr [[TMP70]], align 4 -// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 2 -// CHECK2-NEXT: store i64 [[TMP56]], ptr [[TMP72]], align 4 -// CHECK2-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 2 -// CHECK2-NEXT: store ptr null, ptr [[TMP73]], align 4 -// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP77:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP77]], ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK2-NEXT: [[TMP78:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 -// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP78]], 0 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK2-NEXT: store i32 [[TMP40]], ptr [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK2-NEXT: store i32 [[TMP40]], ptr [[TMP51]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 +// CHECK2-NEXT: store ptr null, ptr [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP43]], ptr [[TMP53]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[ARRAYIDX5]], ptr [[TMP54]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 1 +// CHECK2-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP45]], ptr [[TMP56]], align 4 +// CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[ARRAYIDX6]], ptr [[TMP57]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 2 +// CHECK2-NEXT: store i64 [[TMP49]], ptr [[TMP58]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 2 +// CHECK2-NEXT: store ptr null, ptr [[TMP59]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES10]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP63]], ptr [[DOTCAPTURE_EXPR_12]], align 4 +// CHECK2-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_12]], align 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP64]], 0 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 // CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i32 [[DIV15]], 1 // CHECK2-NEXT: store i32 [[SUB16]], ptr [[DOTCAPTURE_EXPR_13]], align 4 -// CHECK2-NEXT: [[TMP79:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4 -// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP79]], 1 -// CHECK2-NEXT: [[TMP80:%.*]] = zext i32 [[ADD17]] to i64 +// CHECK2-NEXT: [[TMP65:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP65]], 1 +// CHECK2-NEXT: [[TMP66:%.*]] = zext i32 [[ADD17]] to i64 // CHECK2-NEXT: [[KERNEL_ARGS18:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK2-NEXT: store i32 1, ptr [[TMP81]], align 4 -// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 -// CHECK2-NEXT: store i32 3, ptr [[TMP82]], align 4 -// CHECK2-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP74]], ptr [[TMP83]], align 4 -// CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP75]], ptr [[TMP84]], align 4 -// CHECK2-NEXT: [[TMP85:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4 -// CHECK2-NEXT: store ptr [[TMP76]], ptr [[TMP85]], align 4 -// CHECK2-NEXT: [[TMP86:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5 -// CHECK2-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP86]], align 4 -// CHECK2-NEXT: [[TMP87:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6 -// CHECK2-NEXT: store ptr null, ptr [[TMP87]], align 4 -// CHECK2-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7 -// CHECK2-NEXT: store ptr null, ptr [[TMP88]], align 4 -// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8 -// CHECK2-NEXT: store i64 [[TMP80]], ptr [[TMP89]], align 8 -// CHECK2-NEXT: [[TMP90:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.region_id, ptr [[KERNEL_ARGS18]]) -// CHECK2-NEXT: [[TMP91:%.*]] = icmp ne i32 [[TMP90]], 0 -// CHECK2-NEXT: br i1 [[TMP91]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]] +// CHECK2-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 +// CHECK2-NEXT: store i32 1, ptr [[TMP67]], align 4 +// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 +// CHECK2-NEXT: store i32 3, ptr [[TMP68]], align 4 +// CHECK2-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP60]], ptr [[TMP69]], align 4 +// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP61]], ptr [[TMP70]], align 4 +// CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 4 +// CHECK2-NEXT: store ptr [[TMP62]], ptr [[TMP71]], align 4 +// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 5 +// CHECK2-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP72]], align 4 +// CHECK2-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 6 +// CHECK2-NEXT: store ptr null, ptr [[TMP73]], align 4 +// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 7 +// CHECK2-NEXT: store ptr null, ptr [[TMP74]], align 4 +// CHECK2-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 8 +// CHECK2-NEXT: store i64 [[TMP66]], ptr [[TMP75]], align 8 +// CHECK2-NEXT: [[TMP76:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73.region_id, ptr [[KERNEL_ARGS18]]) +// CHECK2-NEXT: [[TMP77:%.*]] = icmp ne i32 [[TMP76]], 0 +// CHECK2-NEXT: br i1 [[TMP77]], label [[OMP_OFFLOAD_FAILED19:%.*]], label [[OMP_OFFLOAD_CONT20:%.*]] // CHECK2: omp_offload.failed19: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73(i32 [[TMP47]], ptr [[TMP48]], ptr [[TMP49]]) #[[ATTR2]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l73(i32 [[TMP40]], ptr [[TMP41]], ptr [[TMP42]]) #[[ATTR2]] // CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT20]] // CHECK2: omp_offload.cont20: -// CHECK2-NEXT: [[TMP92:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP92]], ptr [[SIZE_CASTED21]], align 4 -// CHECK2-NEXT: [[TMP93:%.*]] = load i32, ptr [[SIZE_CASTED21]], align 4 +// CHECK2-NEXT: [[TMP78:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP78]], ptr [[SIZE_CASTED21]], align 4 +// CHECK2-NEXT: [[TMP79:%.*]] = load i32, ptr [[SIZE_CASTED21]], align 4 // CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[A]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP94:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP93]], ptr [[TMP94]], align 4 -// CHECK2-NEXT: [[TMP96:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP93]], ptr [[TMP96]], align 4 -// CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0 -// CHECK2-NEXT: store ptr null, ptr [[TMP98]], align 4 -// CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[A]], ptr [[TMP99]], align 4 -// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[ARRAYIDX22]], ptr [[TMP101]], align 4 -// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1 -// CHECK2-NEXT: store ptr null, ptr [[TMP103]], align 4 -// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 +// CHECK2-NEXT: store i32 [[TMP79]], ptr [[TMP80]], align 4 +// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 +// CHECK2-NEXT: store i32 [[TMP79]], ptr [[TMP81]], align 4 +// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 0 +// CHECK2-NEXT: store ptr null, ptr [[TMP82]], align 4 +// CHECK2-NEXT: [[TMP83:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP83]], align 4 +// CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[ARRAYIDX22]], ptr [[TMP84]], align 4 +// CHECK2-NEXT: [[TMP85:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS25]], i32 0, i32 1 +// CHECK2-NEXT: store ptr null, ptr [[TMP85]], align 4 +// CHECK2-NEXT: [[TMP86:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP87:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK2-NEXT: [[KERNEL_ARGS26:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK2-NEXT: store i32 1, ptr [[TMP106]], align 4 -// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 -// CHECK2-NEXT: store i32 2, ptr [[TMP107]], align 4 -// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP104]], ptr [[TMP108]], align 4 -// CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP105]], ptr [[TMP109]], align 4 -// CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 4 -// CHECK2-NEXT: store ptr @.offload_sizes.11, ptr [[TMP110]], align 4 -// CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 5 -// CHECK2-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP111]], align 4 -// CHECK2-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 6 -// CHECK2-NEXT: store ptr null, ptr [[TMP112]], align 4 -// CHECK2-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 7 -// CHECK2-NEXT: store ptr null, ptr [[TMP113]], align 4 -// CHECK2-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 8 -// CHECK2-NEXT: store i64 0, ptr [[TMP114]], align 8 -// CHECK2-NEXT: [[TMP115:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.region_id, ptr [[KERNEL_ARGS26]]) -// CHECK2-NEXT: [[TMP116:%.*]] = icmp ne i32 [[TMP115]], 0 -// CHECK2-NEXT: br i1 [[TMP116]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]] +// CHECK2-NEXT: [[TMP88:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 +// CHECK2-NEXT: store i32 1, ptr [[TMP88]], align 4 +// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 +// CHECK2-NEXT: store i32 2, ptr [[TMP89]], align 4 +// CHECK2-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP86]], ptr [[TMP90]], align 4 +// CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP87]], ptr [[TMP91]], align 4 +// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 4 +// CHECK2-NEXT: store ptr @.offload_sizes.11, ptr [[TMP92]], align 4 +// CHECK2-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 5 +// CHECK2-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP93]], align 4 +// CHECK2-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 6 +// CHECK2-NEXT: store ptr null, ptr [[TMP94]], align 4 +// CHECK2-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 7 +// CHECK2-NEXT: store ptr null, ptr [[TMP95]], align 4 +// CHECK2-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, ptr [[TMP96]], align 8 +// CHECK2-NEXT: [[TMP97:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78.region_id, ptr [[KERNEL_ARGS26]]) +// CHECK2-NEXT: [[TMP98:%.*]] = icmp ne i32 [[TMP97]], 0 +// CHECK2-NEXT: br i1 [[TMP98]], label [[OMP_OFFLOAD_FAILED27:%.*]], label [[OMP_OFFLOAD_CONT28:%.*]] // CHECK2: omp_offload.failed27: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78(i32 [[TMP93]], ptr [[A]]) #[[ATTR2]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l78(i32 [[TMP79]], ptr [[A]]) #[[ATTR2]] // CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT28]] // CHECK2: omp_offload.cont28: -// CHECK2-NEXT: [[TMP117:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP117]], ptr [[SIZE_CASTED29]], align 4 -// CHECK2-NEXT: [[TMP118:%.*]] = load i32, ptr [[SIZE_CASTED29]], align 4 +// CHECK2-NEXT: [[TMP99:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP99]], ptr [[SIZE_CASTED29]], align 4 +// CHECK2-NEXT: [[TMP100:%.*]] = load i32, ptr [[SIZE_CASTED29]], align 4 // CHECK2-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds [10 x i32], ptr [[A]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP118]], ptr [[TMP119]], align 4 -// CHECK2-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP118]], ptr [[TMP121]], align 4 -// CHECK2-NEXT: [[TMP123:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 0 -// CHECK2-NEXT: store ptr null, ptr [[TMP123]], align 4 -// CHECK2-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[A]], ptr [[TMP124]], align 4 -// CHECK2-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[ARRAYIDX30]], ptr [[TMP126]], align 4 -// CHECK2-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 1 -// CHECK2-NEXT: store ptr null, ptr [[TMP128]], align 4 -// CHECK2-NEXT: [[TMP129:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP130:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0 +// CHECK2-NEXT: store i32 [[TMP100]], ptr [[TMP101]], align 4 +// CHECK2-NEXT: [[TMP102:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0 +// CHECK2-NEXT: store i32 [[TMP100]], ptr [[TMP102]], align 4 +// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 0 +// CHECK2-NEXT: store ptr null, ptr [[TMP103]], align 4 +// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP104]], align 4 +// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[ARRAYIDX30]], ptr [[TMP105]], align 4 +// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS33]], i32 0, i32 1 +// CHECK2-NEXT: store ptr null, ptr [[TMP106]], align 4 +// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0 // CHECK2-NEXT: [[KERNEL_ARGS34:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK2-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 0 -// CHECK2-NEXT: store i32 1, ptr [[TMP131]], align 4 -// CHECK2-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 1 -// CHECK2-NEXT: store i32 2, ptr [[TMP132]], align 4 -// CHECK2-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[TMP129]], ptr [[TMP133]], align 4 -// CHECK2-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 3 -// CHECK2-NEXT: store ptr [[TMP130]], ptr [[TMP134]], align 4 -// CHECK2-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 4 -// CHECK2-NEXT: store ptr @.offload_sizes.15, ptr [[TMP135]], align 4 -// CHECK2-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 5 -// CHECK2-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP136]], align 4 -// CHECK2-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 6 -// CHECK2-NEXT: store ptr null, ptr [[TMP137]], align 4 -// CHECK2-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 7 -// CHECK2-NEXT: store ptr null, ptr [[TMP138]], align 4 -// CHECK2-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 8 -// CHECK2-NEXT: store i64 0, ptr [[TMP139]], align 8 -// CHECK2-NEXT: [[TMP140:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.region_id, ptr [[KERNEL_ARGS34]]) -// CHECK2-NEXT: [[TMP141:%.*]] = icmp ne i32 [[TMP140]], 0 -// CHECK2-NEXT: br i1 [[TMP141]], label [[OMP_OFFLOAD_FAILED35:%.*]], label [[OMP_OFFLOAD_CONT36:%.*]] +// CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 0 +// CHECK2-NEXT: store i32 1, ptr [[TMP109]], align 4 +// CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 1 +// CHECK2-NEXT: store i32 2, ptr [[TMP110]], align 4 +// CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP107]], ptr [[TMP111]], align 4 +// CHECK2-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP108]], ptr [[TMP112]], align 4 +// CHECK2-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 4 +// CHECK2-NEXT: store ptr @.offload_sizes.15, ptr [[TMP113]], align 4 +// CHECK2-NEXT: [[TMP114:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 5 +// CHECK2-NEXT: store ptr @.offload_maptypes.16, ptr [[TMP114]], align 4 +// CHECK2-NEXT: [[TMP115:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 6 +// CHECK2-NEXT: store ptr null, ptr [[TMP115]], align 4 +// CHECK2-NEXT: [[TMP116:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 7 +// CHECK2-NEXT: store ptr null, ptr [[TMP116]], align 4 +// CHECK2-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, ptr [[TMP117]], align 8 +// CHECK2-NEXT: [[TMP118:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81.region_id, ptr [[KERNEL_ARGS34]]) +// CHECK2-NEXT: [[TMP119:%.*]] = icmp ne i32 [[TMP118]], 0 +// CHECK2-NEXT: br i1 [[TMP119]], label [[OMP_OFFLOAD_FAILED35:%.*]], label [[OMP_OFFLOAD_CONT36:%.*]] // CHECK2: omp_offload.failed35: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81(i32 [[TMP118]], ptr [[A]]) #[[ATTR2]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3sumPiiS__l81(i32 [[TMP100]], ptr [[A]]) #[[ATTR2]] // CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT36]] // CHECK2: omp_offload.cont36: // CHECK2-NEXT: ret void @@ -1074,155 +1092,175 @@ // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[OUTPUT1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK2-NEXT: store i32 0, ptr [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[OUTPUT1]], i64 [[TMP5]] -// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +// CHECK2-NEXT: store i32 0, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP10]] +// CHECK2-NEXT: store ptr [[TMP11]], ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP14]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP17]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP21]], i32 [[TMP23]], ptr [[TMP24]], ptr [[TMP25]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK2-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store ptr [[TMP36]], ptr [[TMP35]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK2-NEXT: [[TMP39:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP39]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP40]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT1]], ptr [[TMP30]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP33]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP35]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP43]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP44]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP33]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] +// CHECK2-NEXT: store i32 [[ADD7]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP43]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OUTPUT1]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP38]] monotonic, align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP47]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1232,119 +1270,127 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -// CHECK2-NEXT: store i32 0, ptr [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK2-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[OUTPUT3]], i64 [[TMP11]] -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[_TMP4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 +// CHECK2-NEXT: store i32 0, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP20]] +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[_TMP3]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 [[TMP24]] -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 [[TMP33]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP35]], i32 0 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[ARRAYIDX8]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT3]], ptr [[TMP31]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP36]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP42]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP43]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD12]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP34]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP42]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[OUTPUT3]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP39]] monotonic, align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[OUTPUT]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP46]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1360,15 +1406,15 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK2-NEXT: ret void // // @@ -1380,15 +1426,15 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK2-NEXT: ret void // // @@ -1398,189 +1444,209 @@ // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 // CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..3, i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[OUTPUT2:%.*]] = alloca [3 x i32], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca [3 x i32], align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP2]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 2 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP7]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP2]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP6:%.*]] = sub i64 [[TMP4]], [[TMP5]] -// CHECK2-NEXT: [[TMP7:%.*]] = sdiv exact i64 [[TMP6]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[OUTPUT2]], i64 [[TMP7]] -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64 +// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP12]] +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP25]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..4, i32 [[TMP23]], i32 [[TMP24]], i32 [[TMP26]], ptr [[TMP27]], ptr [[TMP28]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK2-NEXT: store ptr [[TMP36]], ptr [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store ptr [[TMP38]], ptr [[TMP37]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK2-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP42]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT2]], ptr [[TMP33]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP36]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP38]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP45]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP46]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP47]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK2-NEXT: store i32 [[ADD10]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: store i32 [[ADD9]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP39]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done13: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP36]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP47]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done12: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP45]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP42]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY14]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY15:%.*]] -// CHECK2: omp.arraycpy.body15: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST16:%.*]] = phi ptr [ [[OUTPUT2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST17:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY15]] ] -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 [[TMP43]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST17]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT19]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST16]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP42]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY15]] -// CHECK2: omp.arraycpy.done21: +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY13:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP50]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY13]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY14:%.*]] +// CHECK2: omp.arraycpy.body14: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST15:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT17:%.*]], [[OMP_ARRAYCPY_BODY14]] ] +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 [[TMP51]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT17]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT18]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST15]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT17]], [[TMP50]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY14]] +// CHECK2: omp.arraycpy.done20: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef [[OUTPUT:%.*]], ptr noundef [[INPUT:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[INPUT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1590,153 +1656,161 @@ // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[OUTPUT4:%.*]] = alloca [3 x i32], align 4 -// CHECK2-NEXT: [[_TMP5:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OUTPUT:%.*]] = alloca [3 x i32], align 4 +// CHECK2-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[INPUT]], ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP8]] +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 2 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [3 x i32], ptr [[OUTPUT]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_PRECOND_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[OUTPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[OUTPUT4]], i64 [[TMP13]] -// CHECK2-NEXT: store ptr [[TMP15]], ptr [[_TMP5]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 +// CHECK2-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[OUTPUT]], i64 [[TMP22]] +// CHECK2-NEXT: store ptr [[TMP23]], ptr [[_TMP4]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP33]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[I6]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[INPUT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[I6]], align 4 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 [[TMP27]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP5]], align 4 -// CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, ptr [[TMP29]], i32 0 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX10]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP30]], [[TMP28]] -// CHECK2-NEXT: store i32 [[ADD11]], ptr [[ARRAYIDX10]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 [[TMP35]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, ptr [[TMP37]], i32 0 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP38]], [[TMP36]] +// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX9]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP39]], 1 +// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[OUTPUT4]], ptr [[TMP34]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP37]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP39]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[OUTPUT]], ptr [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP44]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP45]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP46]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE16:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST13:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] -// CHECK2-NEXT: store i32 [[ADD14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST13]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT14:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 4 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] +// CHECK2-NEXT: store i32 [[ADD13]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT14]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP40]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done17: -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP37]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE15:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT14]], [[TMP46]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE15]], label [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done16: +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP44]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP43]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE25:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] -// CHECK2: omp.arraycpy.body19: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[OUTPUT4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT23:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY19]] ] -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 [[TMP44]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT22]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT23]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE24:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT22]], [[TMP43]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_DONE25]], label [[OMP_ARRAYCPY_BODY19]] -// CHECK2: omp.arraycpy.done25: +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY17:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP49]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY17]], label [[OMP_ARRAYCPY_DONE24:%.*]], label [[OMP_ARRAYCPY_BODY18:%.*]] +// CHECK2: omp.arraycpy.body18: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST19:%.*]] = phi ptr [ [[OUTPUT]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT22:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST20:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT21:%.*]], [[OMP_ARRAYCPY_BODY18]] ] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 [[TMP50]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT21]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST20]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT22]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST19]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE23:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT21]], [[TMP49]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE23]], label [[OMP_ARRAYCPY_DONE24]], label [[OMP_ARRAYCPY_BODY18]] +// CHECK2: omp.arraycpy.done24: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1752,24 +1826,24 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK2: omp.arraycpy.done2: // CHECK2-NEXT: ret void @@ -1783,24 +1857,24 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 3 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 3 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK2: omp.arraycpy.done2: // CHECK2-NEXT: ret void @@ -1811,106 +1885,112 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined..9, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A2:%.*]] = alloca [2 x i32], align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca [2 x i32], align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 1 -// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP1]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK2: omp.arrayinit.body: // CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK2-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP1]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK2: omp.arrayinit.done: -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]] -// CHECK2-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A2]], i64 [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP8:%.*]] = sub i64 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP9:%.*]] = sdiv exact i64 [[TMP8]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]] // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A2]], ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP14]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) -// CHECK2-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.10, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP19]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP18]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[A]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST2:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT3:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT3]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST2]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP19]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK2: omp.arraycpy.done6: +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE4:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT3]], [[TMP18]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP16]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP22]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK2: omp.arraycpy.body8: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[A2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP23]] monotonic, align 4 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP22]] -// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK2: omp.arraycpy.done14: +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[ARRAYIDX]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY6:%.*]] = icmp eq ptr [[ARRAYIDX]], [[TMP21]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY6]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY7:%.*]] +// CHECK2: omp.arraycpy.body7: +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST8:%.*]] = phi ptr [ [[A]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY7]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST9:%.*]] = phi ptr [ [[ARRAYIDX]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY7]] ] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 [[TMP22]] monotonic, align 4 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST9]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST8]], i32 1 +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE12:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT10]], [[TMP21]] +// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY7]] +// CHECK2: omp.arraycpy.done13: // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: // CHECK2-NEXT: ret void @@ -1924,24 +2004,24 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 2 -// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 2 +// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK2: omp.arraycpy.body: -// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK2-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK2: omp.arraycpy.done2: // CHECK2-NEXT: ret void @@ -1952,72 +2032,78 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined..13, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[SIZE:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK2-NEXT: [[SIZE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 3 -// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64 -// CHECK2-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A1]], i64 [[TMP5]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 3 +// CHECK2-NEXT: store i32 0, ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP4]] to i64 +// CHECK2-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = sub i64 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP8:%.*]] = sdiv exact i64 [[TMP7]], ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP8]] // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIZE]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: for.end: -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP11]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP14]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP15]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.14, ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK2-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK2-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK2-NEXT: ] // CHECK2: .omp.reduction.case1: // CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) +// CHECK2-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP15]], ptr @.gomp_critical_user_.reduction.var) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.case2: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[ARRAYIDX]], i32 [[TMP19]] monotonic, align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK2: .omp.reduction.default: @@ -2032,15 +2118,15 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK2-NEXT: ret void // // @@ -2056,8 +2142,8 @@ // CHECK2-NEXT: [[CALL:%.*]] = call noalias noundef nonnull ptr @_Znaj(i32 noundef 400) #[[ATTR10:[0-9]+]] // CHECK2-NEXT: store ptr [[CALL]], ptr [[ARRAY]], align 4 // CHECK2-NEXT: store i32 0, ptr [[RESULT]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAY]], align 4 -// CHECK2-NEXT: call void @_Z3sumPiiS_(ptr noundef [[TMP1]], i32 noundef 100, ptr noundef [[RESULT]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAY]], align 4 +// CHECK2-NEXT: call void @_Z3sumPiiS_(ptr noundef [[TMP0]], i32 noundef 100, ptr noundef [[RESULT]]) // CHECK2-NEXT: ret i32 0 // // diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -9,12 +9,19 @@ void bar1(void) { #pragma omp parallel // #0 // safe-remark@#0 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine. [OMP101]}} + // all-remark@#0 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // all-remark@#0 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // all-remark@#0 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#0 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } } void bar2(void) { #pragma omp parallel // #1 // safe-remark@#1 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine. [OMP101]}} + // all-remark@#1 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // all-remark@#1 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#1 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } } @@ -26,10 +33,14 @@ { baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} #pragma omp parallel // #3 + // all-remark@#3 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#3 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); #pragma omp parallel // #4 + // all-remark@#4 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#4 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } } @@ -41,11 +52,15 @@ { baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} #pragma omp parallel // #6 + // all-remark@#6 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#6 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); bar2(); #pragma omp parallel // #7 + // all-remark@#7 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#7 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); @@ -59,11 +74,15 @@ { baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} #pragma omp parallel // #9 + // all-remark@#9 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#9 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); bar2(); #pragma omp parallel // #10 + // all-remark@#10 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // safe-remark@#10 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} { } bar1(); diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -6,8 +6,11 @@ void baz(void) __attribute__((assume("omp_no_openmp"))); void bar(void) { -#pragma omp parallel // #1 \ +#pragma omp parallel // #1 // expected-remark@#1 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine. [OMP101]}} + // expected-remark@#1 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // expected-remark@#1 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} + { } } @@ -17,11 +20,17 @@ // expected-remark@#2 {{Rewriting generic-mode kernel with a customized state machine. [OMP131]}} { baz(); // expected-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. [OMP121]}} -#pragma omp parallel +#pragma omp parallel // #3 + // expected-remark@#3 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // expected-remark@#3 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} + { } bar(); -#pragma omp parallel +#pragma omp parallel // #4 + // expected-remark@#4 {{Value has potential side effects preventing SPMD-mode execution. [OMP121]}} + // expected-remark@#4 {{Replaced globalized variable with 1 byte of shared memory. [OMP111]}} + { } } diff --git a/clang/test/OpenMP/sections_firstprivate_codegen.cpp b/clang/test/OpenMP/sections_firstprivate_codegen.cpp --- a/clang/test/OpenMP/sections_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_firstprivate_codegen.cpp @@ -397,6 +397,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -405,22 +406,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -489,115 +498,114 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 1 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 1 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE7:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE3:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case7: -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR5]], i64 4, i1 false) +// CHECK1: .omp.sections.case3: +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN5]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP21]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE6:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done6: +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP26]]) // CHECK1-NEXT: ret void // // @@ -815,73 +823,75 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 1 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 10, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 10, ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 +// CHECK3: .omp.sections.case1: +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -1037,64 +1047,69 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP2]], ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 -// CHECK4-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 -// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 1 +// CHECK4-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 1 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 10, ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 10, ptr [[SIVAR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2: +// CHECK4: .omp.sections.case1: // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -1106,25 +1121,25 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP12:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED3]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP14]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[BLOCK_CAPTURED2]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK4-NEXT: call void [[TMP17]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_lastprivate_codegen.cpp b/clang/test/OpenMP/sections_lastprivate_codegen.cpp --- a/clang/test/OpenMP/sections_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/sections_lastprivate_codegen.cpp @@ -197,6 +197,8 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -206,24 +208,34 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP6]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -250,42 +262,40 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: @@ -295,85 +305,85 @@ // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 0 +// CHECK1-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 0 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: store i32 31, ptr [[SIVAR5]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: store i32 31, ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN2]], [[TMP27]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN2]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done9: -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP31]]) // CHECK1-NEXT: ret void // // @@ -388,10 +398,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -400,34 +411,36 @@ // CHECK1-NEXT: [[X:%.*]] = alloca double, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP9]], 1.000000e+00 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP10]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr [[X]], align 8 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: @@ -435,21 +448,21 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC2]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[X]], align 8 -// CHECK1-NEXT: store double [[TMP13]], ptr @_ZN1A1xE, align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[X]], align 8 +// CHECK1-NEXT: store double [[TMP14]], ptr @_ZN1A1xE, align 8 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -457,35 +470,44 @@ // CHECK1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -546,126 +568,125 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 1 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE5:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case5: -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK1: .omp.sections.case1: +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX2]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN3]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN3]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done9: -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done5: +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP28]]) // CHECK1-NEXT: ret void // // @@ -685,7 +706,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, ptr [[F]], align 4 // CHECK1-NEXT: ret void // @@ -698,7 +719,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -726,79 +747,81 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.sections.case: // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 13, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store i32 13, ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK3: .omp.sections.case2: -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK3: .omp.sections.case1: +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[G]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP15]], ptr @g, align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[G]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP17]], ptr @g, align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -821,9 +844,9 @@ // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32 }>, ptr [[BLOCK]], i32 0, i32 5 // CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 // CHECK4-NEXT: store i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK4-NEXT: call void [[TMP4]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void [[TMP2]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: ret i32 0 // // @@ -832,60 +855,65 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 -// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK4-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE2:%.*]] +// CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.sections.case: // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 17, ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 17, ptr [[SIVAR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK4: .omp.sections.case2: +// CHECK4: .omp.sections.case1: // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -897,35 +925,35 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP10:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[BLOCK_CAPTURED3]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP12]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[BLOCK_CAPTURED2]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK4-NEXT: call void [[TMP15]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK4-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK4-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP20]], ptr @g, align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP21]], ptr [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP19]], ptr @g, align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], ptr [[TMP2]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // @@ -952,6 +980,8 @@ // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -961,24 +991,34 @@ // CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK5-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]], ptr @_ZZ4mainE5sivar) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP4]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_1]]) // CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK5-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done1: +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done2: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP2]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP6]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1005,42 +1045,40 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[SIVAR5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1050,85 +1088,85 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 0 -// CHECK5-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP7]], i32 0 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp slt i32 [[TMP13]], 0 +// CHECK5-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[TMP13]], i32 0 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP13]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP19]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: store i32 31, ptr [[SIVAR5]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store i32 31, ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK5-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP20]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN2]], [[TMP27]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN2]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: +// CHECK5-NEXT: [[CALL5:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP28]], ptr [[TMP10]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: -// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP28]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done7: +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP31]]) // CHECK5-NEXT: ret void // // @@ -1143,10 +1181,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1155,74 +1194,76 @@ // CHECK5-NEXT: [[X:%.*]] = alloca [[STRUCT_LASPRIVATE_CONDITIONAL:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 1 -// CHECK5-NEXT: store i8 0, ptr [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1 -// CHECK5-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 1 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 1 +// CHECK5-NEXT: store i8 0, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_LASPRIVATE_CONDITIONAL]], ptr [[X]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = icmp slt i32 [[TMP5]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 1 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP11]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp sle i32 [[TMP13]], [[TMP12]] -// CHECK5-NEXT: br i1 [[TMP14]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] +// CHECK5-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = icmp sle i32 [[TMP14]], [[TMP13]] +// CHECK5-NEXT: br i1 [[TMP15]], label [[LP_COND_THEN:%.*]], label [[LP_COND_EXIT:%.*]] // CHECK5: lp_cond_then: -// CHECK5-NEXT: store i32 [[TMP12]], ptr @.{{pl_cond[.].+[.|,]}} align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK5-NEXT: store double [[TMP15]], ptr @{{pl_cond[.].+[.|,]}} align 8 +// CHECK5-NEXT: store i32 [[TMP13]], ptr @.{{pl_cond[.].+[.|,]}} align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK5-NEXT: store double [[TMP16]], ptr @{{pl_cond[.].+[.|,]}} align 8 // CHECK5-NEXT: br label [[LP_COND_EXIT]] // CHECK5: lp_cond_exit: -// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP3]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) +// CHECK5-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_{{pl_cond[.].+[.|,]}}var) // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.case1: // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: store i32 [[INC2]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP3]]) -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP4]]) +// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP19:%.*]] = load double, ptr @{{pl_cond[.].+[.|,]}} align 8 -// CHECK5-NEXT: store double [[TMP19]], ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK5-NEXT: store double [[TMP20]], ptr @_ZN1A1xE, align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load double, ptr @{{pl_cond[.].+[.|,]}} align 8 +// CHECK5-NEXT: store double [[TMP20]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK5-NEXT: store double [[TMP21]], ptr @_ZN1A1xE, align 8 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // @@ -1230,35 +1271,44 @@ // CHECK5-SAME: () #[[ATTR7:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK5: arraydestroy.done1: // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: ret i32 [[TMP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK5-NEXT: ret i32 [[TMP5]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1319,126 +1369,125 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = icmp slt i32 [[TMP6]], 1 -// CHECK5-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 1 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = icmp slt i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i32 [[TMP11]], i32 1 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: switch i32 [[TMP12]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK5-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE5:%.*]] +// CHECK5-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.sections.case: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK5: .omp.sections.case5: -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX6]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK5: .omp.sections.case1: +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX2]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK5-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK5: .omp.sections.exit: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IL_]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[VEC2]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP23]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN3]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN3]], [[TMP25]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[CALL8:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN3]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done5: +// CHECK5-NEXT: [[CALL6:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEaSERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP26]]) +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: +// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP28]]) // CHECK5-NEXT: ret void // // @@ -1458,7 +1507,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -1471,7 +1520,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void diff --git a/clang/test/OpenMP/sections_private_codegen.cpp b/clang/test/OpenMP/sections_private_codegen.cpp --- a/clang/test/OpenMP/sections_private_codegen.cpp +++ b/clang/test/OpenMP/sections_private_codegen.cpp @@ -113,6 +113,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -122,23 +123,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -165,10 +166,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -181,6 +183,8 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 @@ -196,30 +200,30 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 0 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 @@ -227,28 +231,28 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP14]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -271,6 +275,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -279,22 +284,22 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -355,10 +360,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -370,6 +376,8 @@ // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 @@ -385,31 +393,31 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.case1: // CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 @@ -418,28 +426,28 @@ // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN3]], i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN3]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP14]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN3]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done4: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP16]]) // CHECK1-NEXT: ret void // // @@ -498,10 +506,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -512,28 +521,30 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] @@ -542,22 +553,22 @@ // CHECK3-NEXT: store i32 11, ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case1: -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP11]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -576,17 +587,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -597,28 +610,30 @@ // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, double, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] @@ -638,25 +653,25 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], ptr [[BLOCK_CAPTURED2]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], ptr [[BLOCK_CAPTURED2]], align 8 // CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK4-NEXT: call void [[TMP14]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK4-NEXT: call void [[TMP13]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/sections_reduction_codegen.cpp b/clang/test/OpenMP/sections_reduction_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_codegen.cpp @@ -192,6 +192,7 @@ // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 @@ -202,24 +203,36 @@ // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 6, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -246,197 +259,194 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca float, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S]], align 4 -// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[REF_TMP11:%.*]] = alloca [[STRUCT_S]], align 4 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 0 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: store float 0.000000e+00, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store float 0x47EFFFFFE0000000, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 0 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 0 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP15]] to i32 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fptosi float [[TMP22]] to i32 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR3]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = fptosi float [[TMP18]] to i32 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: store i32 [[CONV7]], ptr [[ARRAYIDX8]], align 4 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR14]], i64 4, i1 false) +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP12]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi float [[TMP23]] to i32 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: store i32 [[CONV2]], ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP12]], i64 0, i64 1 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR1]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP27]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP31]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP29]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP32:%.*]] = load float, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP32]], [[TMP33]] -// CHECK1-NEXT: store float [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL10:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL10]], 0.000000e+00 +// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP30]], [[TMP31]] +// CHECK1-NEXT: store float [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL5]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL11:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL12:%.*]] = fcmp une float [[CALL11]], 0.000000e+00 +// CHECK1-NEXT: [[CALL6:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL7:%.*]] = fcmp une float [[CALL6]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP36:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL12]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV13:%.*]] = uitofp i1 [[TMP36]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP32:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL7]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV8:%.*]] = uitofp i1 [[TMP32]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV8]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP14:%.*]] = fcmp olt float [[TMP39]], [[TMP40]] -// CHECK1-NEXT: br i1 [[CMP14]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = fcmp olt float [[TMP33]], [[TMP34]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP41:%.*]] = load float, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load float, ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP42:%.*]] = load float, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP41]], [[COND_TRUE]] ], [ [[TMP42]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP35]], [[COND_TRUE]] ], [ [[TMP36]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP43:%.*]] = load float, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = atomicrmw fadd ptr [[TMP0]], float [[TMP43]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL15:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL15]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL18:%.*]] = fcmp une float [[CALL17]], 0.000000e+00 -// CHECK1-NEXT: br i1 [[TOBOOL18]], label [[LAND_RHS19:%.*]], label [[LAND_END22:%.*]] -// CHECK1: land.rhs19: -// CHECK1-NEXT: [[CALL20:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL21:%.*]] = fcmp une float [[CALL20]], 0.000000e+00 -// CHECK1-NEXT: br label [[LAND_END22]] -// CHECK1: land.end22: -// CHECK1-NEXT: [[TMP47:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL21]], [[LAND_RHS19]] ] -// CHECK1-NEXT: [[CONV23:%.*]] = uitofp i1 [[TMP47]] to float -// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]], float noundef [[CONV23]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP16]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP50:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP3]] monotonic, align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = atomicrmw fadd ptr [[TMP2]], float [[TMP37]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL10]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL13:%.*]] = fcmp une float [[CALL12]], 0.000000e+00 +// CHECK1-NEXT: br i1 [[TOBOOL13]], label [[LAND_RHS14:%.*]], label [[LAND_END17:%.*]] +// CHECK1: land.rhs14: +// CHECK1-NEXT: [[CALL15:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL16:%.*]] = fcmp une float [[CALL15]], 0.000000e+00 +// CHECK1-NEXT: br label [[LAND_END17]] +// CHECK1: land.end17: +// CHECK1-NEXT: [[TMP39:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL16]], [[LAND_RHS14]] ] +// CHECK1-NEXT: [[CONV18:%.*]] = uitofp i1 [[TMP39]] to float +// CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]], float noundef [[CONV18]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP11]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i32, ptr [[TMP8]] monotonic, align 4 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP52:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END22]] ], [ [[TMP62:%.*]], [[COND_END27:%.*]] ] -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i32 [[TMP52]] to float -// CHECK1-NEXT: store float [[TMP54]], ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP24:%.*]] = fcmp olt float [[TMP55]], [[TMP56]] -// CHECK1-NEXT: br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE26:%.*]] -// CHECK1: cond.true25: -// CHECK1-NEXT: [[TMP57:%.*]] = load float, ptr [[TMP]], align 4 -// CHECK1-NEXT: br label [[COND_END27]] -// CHECK1: cond.false26: -// CHECK1-NEXT: [[TMP58:%.*]] = load float, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: br label [[COND_END27]] -// CHECK1: cond.end27: -// CHECK1-NEXT: [[COND28:%.*]] = phi float [ [[TMP57]], [[COND_TRUE25]] ], [ [[TMP58]], [[COND_FALSE26]] ] -// CHECK1-NEXT: store float [[COND28]], ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = cmpxchg ptr [[TMP3]], i32 [[TMP52]], i32 [[TMP59]] monotonic monotonic, align 4 -// CHECK1-NEXT: [[TMP62]] = extractvalue { i32, i1 } [[TMP61]], 0 -// CHECK1-NEXT: [[TMP63:%.*]] = extractvalue { i32, i1 } [[TMP61]], 1 -// CHECK1-NEXT: br i1 [[TMP63]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP41:%.*]] = phi i32 [ [[ATOMIC_LOAD]], [[LAND_END17]] ], [ [[TMP49:%.*]], [[COND_END22:%.*]] ] +// CHECK1-NEXT: [[TMP42:%.*]] = bitcast i32 [[TMP41]] to float +// CHECK1-NEXT: store float [[TMP42]], ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP19:%.*]] = fcmp olt float [[TMP43]], [[TMP44]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// CHECK1: cond.true20: +// CHECK1-NEXT: [[TMP45:%.*]] = load float, ptr [[TMP]], align 4 +// CHECK1-NEXT: br label [[COND_END22]] +// CHECK1: cond.false21: +// CHECK1-NEXT: [[TMP46:%.*]] = load float, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: br label [[COND_END22]] +// CHECK1: cond.end22: +// CHECK1-NEXT: [[COND23:%.*]] = phi float [ [[TMP45]], [[COND_TRUE20]] ], [ [[TMP46]], [[COND_FALSE21]] ] +// CHECK1-NEXT: store float [[COND23]], ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 +// CHECK1-NEXT: [[TMP48:%.*]] = cmpxchg ptr [[TMP8]], i32 [[TMP41]], i32 [[TMP47]] monotonic monotonic, align 4 +// CHECK1-NEXT: [[TMP49]] = extractvalue { i32, i1 } [[TMP48]], 0 +// CHECK1-NEXT: [[TMP50:%.*]] = extractvalue { i32, i1 } [[TMP48]], 1 +// CHECK1-NEXT: br i1 [[TMP50]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP14]]) // CHECK1-NEXT: ret void // // @@ -449,55 +459,55 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store float [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP16]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP22]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store float [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIfEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP11]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP15]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = fcmp une float [[CALL2]], 0.000000e+00 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP19]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef float @_ZN1SIfEcvfEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = fcmp une float [[CALL3]], 0.000000e+00 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = uitofp i1 [[TMP34]] to float +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = uitofp i1 [[TMP22]] to float // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], float noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP22]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp olt float [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP19]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP40:%.*]] = load float, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP17]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP39]], [[COND_TRUE]] ], [ [[TMP40]], [[COND_FALSE]] ] -// CHECK1-NEXT: store float [[COND]], ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi float [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] +// CHECK1-NEXT: store float [[COND]], ptr [[TMP19]], align 4 // CHECK1-NEXT: ret void // // @@ -543,6 +553,7 @@ // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 // CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -552,23 +563,35 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, ptr [[T_VAR]], ptr [[VAR]], ptr [[VAR1]], ptr [[T_VAR1]], ptr [[VEC]], ptr [[S_ARR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -635,165 +658,162 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR1:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VAR3:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[VAR14:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[T_VAR15:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[VAR1:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S_0]], align 4 -// CHECK1-NEXT: [[REF_TMP13:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[REF_TMP8:%.*]] = alloca [[STRUCT_S_0]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR1]], ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[T_VAR1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 1 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: store i32 2147483647, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP15]], i32 1 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP14]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] -// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE6:%.*]] +// CHECK1-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP10]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] -// CHECK1: .omp.sections.case6: -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR3]], i64 4, i1 false) +// CHECK1: .omp.sections.case1: +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP12]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VAR3]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: store ptr [[VAR14]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 -// CHECK1-NEXT: store ptr [[T_VAR15]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP7]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: store ptr [[VAR1]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 3 +// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP14]], i32 4, i64 32, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP28]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL8:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL8]], 0 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[CALL9]], 0 +// CHECK1-NEXT: [[CALL4:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL5:%.*]] = icmp ne i32 [[CALL4]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP33:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL10]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP33]] to i32 +// CHECK1-NEXT: [[TMP31:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE1]] ], [ [[TOBOOL5]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP31]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR15]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR1]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP38]], [[COND_TRUE]] ], [ [[TMP39]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP34]], [[COND_TRUE]] ], [ [[TMP35]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP14]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP40]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL12:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP1]], ptr align 4 [[CALL12]], i64 4, i1 false) -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[CALL14:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]]) -// CHECK1-NEXT: [[TOBOOL15:%.*]] = icmp ne i32 [[CALL14]], 0 -// CHECK1-NEXT: br i1 [[TOBOOL15]], label [[LAND_RHS16:%.*]], label [[LAND_END19:%.*]] -// CHECK1: land.rhs16: -// CHECK1-NEXT: [[CALL17:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) -// CHECK1-NEXT: [[TOBOOL18:%.*]] = icmp ne i32 [[CALL17]], 0 -// CHECK1-NEXT: br label [[LAND_END19]] -// CHECK1: land.end19: -// CHECK1-NEXT: [[TMP44:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL18]], [[LAND_RHS16]] ] -// CHECK1-NEXT: [[CONV20:%.*]] = zext i1 [[TMP44]] to i32 -// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]], i32 noundef [[CONV20]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[REF_TMP13]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP13]]) #[[ATTR4]] -// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP7]], ptr @.gomp_critical_user_.atomic_reduction.var) -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[T_VAR15]], align 4 -// CHECK1-NEXT: [[TMP48:%.*]] = atomicrmw min ptr [[TMP3]], i32 [[TMP47]] monotonic, align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP36]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL7:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[CALL7]], i64 4, i1 false) +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]]) +// CHECK1-NEXT: [[TOBOOL10:%.*]] = icmp ne i32 [[CALL9]], 0 +// CHECK1-NEXT: br i1 [[TOBOOL10]], label [[LAND_RHS11:%.*]], label [[LAND_END14:%.*]] +// CHECK1: land.rhs11: +// CHECK1-NEXT: [[CALL12:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) +// CHECK1-NEXT: [[TOBOOL13:%.*]] = icmp ne i32 [[CALL12]], 0 +// CHECK1-NEXT: br label [[LAND_END14]] +// CHECK1: land.end14: +// CHECK1-NEXT: [[TMP38:%.*]] = phi i1 [ false, [[DOTOMP_REDUCTION_CASE2]] ], [ [[TOBOOL13]], [[LAND_RHS11]] ] +// CHECK1-NEXT: [[CONV15:%.*]] = zext i1 [[TMP38]] to i32 +// CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP8]], i32 noundef [[CONV15]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[REF_TMP8]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP8]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB3]], i32 [[TMP14]], ptr @.gomp_critical_user_.atomic_reduction.var) +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = atomicrmw min ptr [[TMP8]], i32 [[TMP39]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR4]] -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR1]]) #[[ATTR4]] +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: ret void // // @@ -806,55 +826,55 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP3]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP4]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[TMP2]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP16]], ptr align 4 [[CALL]], i64 4, i1 false) -// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP22]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP11]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP9]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[CALL]], i64 4, i1 false) +// CHECK1-NEXT: [[CALL2:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP15]]) // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[CALL2]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[LAND_RHS:%.*]], label [[LAND_END:%.*]] // CHECK1: land.rhs: -// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP19]]) +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZN1SIiEcviEv(ptr noundef nonnull align 4 dereferenceable(4) [[TMP13]]) // CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[CALL3]], 0 // CHECK1-NEXT: br label [[LAND_END]] // CHECK1: land.end: -// CHECK1-NEXT: [[TMP34:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP34]] to i32 +// CHECK1-NEXT: [[TMP22:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[TOBOOL4]], [[LAND_RHS]] ] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TMP22]] to i32 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]], i32 noundef [[CONV]]) -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP22]], ptr align 4 [[REF_TMP]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[REF_TMP]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP19]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE]] ], [ [[TMP40]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP28]], align 4 +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], ptr [[TMP19]], align 4 // CHECK1-NEXT: ret void // // @@ -939,10 +959,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -953,29 +974,31 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK3-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK3-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK3-NEXT: ] @@ -983,40 +1006,40 @@ // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.case1: -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK3: .omp.sections.exit: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP14]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP13]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr @g, align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP14:%.*]] = load double, ptr @g, align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP14]], [[TMP15]] // CHECK3-NEXT: store double [[ADD]], ptr @g, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP17:%.*]] = load double, ptr [[G]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = atomicrmw fadd ptr @g, double [[TMP17]] monotonic, align 8 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw fadd ptr @g, double [[TMP16]] monotonic, align 8 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -1028,15 +1051,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store double [[ADD]], ptr [[TMP10]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store double [[ADD]], ptr [[TMP7]], align 8 // CHECK3-NEXT: ret void // // @@ -1055,17 +1078,19 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 @@ -1076,29 +1101,31 @@ // CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 // CHECK4-NEXT: store double 0.000000e+00, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 1 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1 +// CHECK4-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 1 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: switch i32 [[TMP8]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: switch i32 [[TMP9]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK4-NEXT: i32 1, label [[DOTOMP_SECTIONS_CASE1:%.*]] // CHECK4-NEXT: ] @@ -1117,42 +1144,42 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP9:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP10]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK4-NEXT: call void [[TMP13]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: call void [[TMP12]](ptr noundef [[BLOCK]]) // CHECK4-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK4: .omp.sections.exit: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK4-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK4-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK4-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK4-NEXT: store ptr [[G]], ptr [[TMP14]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK4-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK4-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK4-NEXT: ] // CHECK4: .omp.reduction.case1: -// CHECK4-NEXT: [[TMP20:%.*]] = load double, ptr @g, align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP20]], [[TMP21]] +// CHECK4-NEXT: [[TMP16:%.*]] = load double, ptr @g, align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP16]], [[TMP17]] // CHECK4-NEXT: store double [[ADD]], ptr @g, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.case2: -// CHECK4-NEXT: [[TMP22:%.*]] = load double, ptr [[G]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = atomicrmw fadd ptr @g, double [[TMP22]] monotonic, align 8 -// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP1]], ptr @.gomp_critical_user_.reduction.var) +// CHECK4-NEXT: [[TMP18:%.*]] = load double, ptr [[G]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = atomicrmw fadd ptr @g, double [[TMP18]] monotonic, align 8 +// CHECK4-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK4: .omp.reduction.default: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // @@ -1176,14 +1203,14 @@ // CHECK4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store double [[ADD]], ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[ADD:%.*]] = fadd double [[TMP8]], [[TMP9]] +// CHECK4-NEXT: store double [[ADD]], ptr [[TMP7]], align 8 // CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -41,275 +41,281 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[ARGV_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGV_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret i32 0 // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_SECTIONS_LB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_UB_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_ST_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IL_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_SECTIONS_IV_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_LB_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_UB_]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_SECTIONS_ST_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_SECTIONS_IL_]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = sext i32 [[TMP4]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP5]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP6]], i64 9 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = sub i64 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = sdiv exact i64 [[TMP10]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP12:%.*]] = add nuw i64 [[TMP11]], 1 -// CHECK1-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP12]], align 16 -// CHECK1-NEXT: store i64 [[TMP12]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP15]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 0 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP9]], i64 9 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP13:%.*]] = sub i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = sdiv exact i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP15:%.*]] = add nuw i64 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP15]], align 16 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = sub i64 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: [[TMP21:%.*]] = sdiv exact i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP21]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP22]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP23:%.*]] = sub i64 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP24:%.*]] = sdiv exact i64 [[TMP23]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP24]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP35]], i64 0 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = sext i32 [[TMP36]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP37]] -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP38]], i64 9 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP37]] +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP38]], i64 9 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 [[LB_ADD_LEN8]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 // CHECK1-NEXT: [[TMP43:%.*]] = sub i64 [[TMP41]], [[TMP42]] // CHECK1-NEXT: [[TMP44:%.*]] = sdiv exact i64 [[TMP43]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP45:%.*]] = add nuw i64 [[TMP44]], 1 // CHECK1-NEXT: [[TMP46:%.*]] = mul nuw i64 [[TMP45]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP46]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP51]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP53:%.*]] = load i32, ptr [[TMP52]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP55]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[TMP56]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP57]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = icmp slt i32 [[TMP58]], 0 -// CHECK1-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 0 -// CHECK1-NEXT: store i32 [[TMP60]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 -// CHECK1-NEXT: store i32 [[TMP61]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP53]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_SECTIONS_IL_]], ptr [[DOTOMP_SECTIONS_LB_]], ptr [[DOTOMP_SECTIONS_UB_]], ptr [[DOTOMP_SECTIONS_ST_]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP57:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP58:%.*]] = icmp slt i32 [[TMP57]], 0 +// CHECK1-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], i32 [[TMP57]], i32 0 +// CHECK1-NEXT: store i32 [[TMP59]], ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_LB_]], align 4 +// CHECK1-NEXT: store i32 [[TMP60]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP62]], [[TMP63]] +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_UB_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP61]], [[TMP62]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: switch i32 [[TMP64]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: switch i32 [[TMP63]], label [[DOTOMP_SECTIONS_EXIT:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTOMP_SECTIONS_CASE:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.sections.case: -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP75]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP71]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP81]], ptr [[TMP71]]) +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP67]], ptr [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, ptr [[TMP68]], align 4 +// CHECK1-NEXT: [[TMP70:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP69]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP70]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP71]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[TMP72]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP73]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP70]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP76]], ptr [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load i32, ptr [[TMP77]], align 4 +// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP78]], ptr [[TMP70]]) // CHECK1-NEXT: br label [[DOTOMP_SECTIONS_EXIT]] // CHECK1: .omp.sections.exit: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP83]], 1 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[DOTOMP_SECTIONS_IV_]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP80]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTOMP_SECTIONS_IV_]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP12]] to ptr -// CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 -// CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP82]]) +// CHECK1-NEXT: [[TMP83:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP83]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP84]], i32 1) +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP88:%.*]] = inttoptr i64 [[TMP15]] to ptr +// CHECK1-NEXT: store ptr [[TMP88]], ptr [[TMP87]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP89]], align 4 +// CHECK1-NEXT: [[TMP91:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB3:[0-9]+]], i32 [[TMP90]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP91]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP93:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP92]], [[TMP93]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP94:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP95]] to i32 +// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP96]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP94]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP90]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP12]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP97]] monotonic, align 4 +// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP15]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP99]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP101:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP106:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP101]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP102]] to i32 +// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP103]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP105:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP101]], i8 [[TMP104]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP106]] = extractvalue { i8, i1 } [[TMP105]], 0 +// CHECK1-NEXT: [[TMP107:%.*]] = extractvalue { i8, i1 } [[TMP105]], 1 +// CHECK1-NEXT: br i1 [[TMP107]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP99]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB3]], i32 [[TMP90]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP113]]) -// CHECK1-NEXT: [[TMP114:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP115:%.*]] = load i32, ptr [[TMP114]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP115]]) +// CHECK1-NEXT: [[TMP108:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP108]]) +// CHECK1-NEXT: [[TMP109:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP110:%.*]] = load i32, ptr [[TMP109]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP110]]) // CHECK1-NEXT: ret void // // @@ -320,8 +326,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -332,12 +338,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -432,59 +438,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -496,38 +502,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -488,28 +488,30 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store double 0.000000e+00, ptr [[A]], align 8 // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[THIS1]], i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK1-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 @@ -517,42 +519,46 @@ // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK1-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP15]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -597,7 +603,7 @@ // CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -607,54 +613,60 @@ // CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK1-NEXT: store double [[INC]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, ptr [[TMP1]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK1-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP14]]) // CHECK1-NEXT: ret void // // @@ -684,9 +696,7 @@ // CHECK1-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -711,90 +721,100 @@ // CHECK1-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..8, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 // CHECK1-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP26]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -802,44 +822,44 @@ // CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_3:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 // CHECK1-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // @@ -874,14 +894,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -890,49 +910,57 @@ // CHECK1-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK1-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK1-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK1-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP25]]) // CHECK1-NEXT: ret void // // @@ -969,36 +997,40 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK1-SAME: () #[[ATTR10]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]] +// CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]] // CHECK1-NEXT: unreachable // // @@ -1370,36 +1402,40 @@ // CHECK2-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK2-SAME: () #[[ATTR10]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: // CHECK2-NEXT: invoke void @_Z3foov() // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1431,9 +1467,7 @@ // CHECK2-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -1458,30 +1492,32 @@ // CHECK2-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1491,57 +1527,65 @@ // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 // CHECK2-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.6, i32 [[TMP18]]) +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.6, i32 [[TMP26]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1549,9 +1593,7 @@ // CHECK2-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -1571,22 +1613,24 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK2-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..7, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // @@ -1621,14 +1665,14 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -1637,49 +1681,57 @@ // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK2-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK2-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4 +// CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK2-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK2-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK2-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.8, i32 [[TMP17]]) +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK2-NEXT: store ptr [[B]], ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.8, i32 [[TMP25]]) // CHECK2-NEXT: ret void // // @@ -1718,71 +1770,77 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: store double 0.000000e+00, ptr [[A]], align 8 // CHECK2-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK2-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..9, ptr [[THIS1]], i64 [[TMP2]]) +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK2-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK2-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK2-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.10, i32 [[TMP11]]) +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.10, i32 [[TMP15]]) // CHECK2-NEXT: ret void // CHECK2: terminate.lpad: -// CHECK2-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK2-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK2-NEXT: catch ptr null -// CHECK2-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]] +// CHECK2-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]] // CHECK2-NEXT: unreachable // // @@ -1790,15 +1848,15 @@ // CHECK2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP3]], align 8 // CHECK2-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) @@ -1827,64 +1885,70 @@ // CHECK2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_5:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 // CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK2-NEXT: store double [[INC]], ptr [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK2-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, ptr [[TMP1]], i64 [[TMP8]]) +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK2-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK2-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK2-NEXT: store double [[INC]], ptr [[TMP6]], align 8 -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8 +// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK2-NEXT: store double [[INC]], ptr [[TMP10]], align 8 +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK2-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.12, i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK2-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK2-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.12, i32 [[TMP14]]) // CHECK2-NEXT: ret void // // @@ -2262,36 +2326,40 @@ // CHECK4-LABEL: define {{[^@]+}}@_Z15parallel_singlev // CHECK4-SAME: () #[[ATTR10]] { // CHECK4-NEXT: entry: -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: invoke void @_Z3foov() // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } +// CHECK4-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } // CHECK4-NEXT: catch ptr null -// CHECK4-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2323,9 +2391,7 @@ // CHECK4-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -2350,30 +2416,32 @@ // CHECK4-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]) +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2383,57 +2451,65 @@ // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK4-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP18]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 // CHECK4-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP18]]) +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP26]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } +// CHECK4-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } // CHECK4-NEXT: catch ptr null -// CHECK4-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2441,9 +2517,7 @@ // CHECK4-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -2463,22 +2537,24 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK4-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..6, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]) +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[THIS1]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // @@ -2513,14 +2589,14 @@ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -2529,49 +2605,57 @@ // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[C]], align 4 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 -// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK4-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK4-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK4-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4 +// CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1 +// CHECK4-NEXT: store i32 [[DEC]], ptr [[B]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1 +// CHECK4-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK4-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 -// CHECK4-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK4-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP17]]) +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK4-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1 +// CHECK4-NEXT: store ptr [[B]], ptr [[TMP22]], align 8 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP25]]) // CHECK4-NEXT: ret void // // @@ -2610,71 +2694,77 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: store double 0.000000e+00, ptr [[A]], align 8 // CHECK4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[A3]], ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK4-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..8, ptr [[THIS1]], i64 [[TMP2]]) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8 +// CHECK4-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_4:%.*]], align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 // CHECK4-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK4: invoke.cont: -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP11]]) +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP15]]) // CHECK4-NEXT: ret void // CHECK4: terminate.lpad: -// CHECK4-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK4-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK4-NEXT: catch ptr null -// CHECK4-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0 -// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]] +// CHECK4-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK4-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]] // CHECK4-NEXT: unreachable // // @@ -2682,15 +2772,15 @@ // CHECK4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] align 2 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK4-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_5:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_4:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], ptr [[THIS1]], i32 0, i32 1 // CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK4-NEXT: store ptr [[TMP5]], ptr [[TMP3]], align 8 // CHECK4-NEXT: call void @_ZZZN3SSTIdEC1EvENKUlvE_clEvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) @@ -2719,64 +2809,70 @@ // CHECK4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_5:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 // CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK4-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 // CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 // CHECK4-NEXT: store double [[INC]], ptr [[TMP3]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK4-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP8]]) +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], ptr [[THIS1]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8 +// CHECK4-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK4-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK4-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK4-NEXT: store double [[TMP4]], ptr [[A]], align 8 +// CHECK4-NEXT: store ptr [[A]], ptr [[TMP]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8 -// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00 -// CHECK4-NEXT: store double [[INC]], ptr [[TMP6]], align 8 -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8 +// CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK4-NEXT: store double [[INC]], ptr [[TMP10]], align 8 +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK4-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 -// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP10]]) +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK4-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4 +// CHECK4-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB1]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP14]]) // CHECK4-NEXT: ret void // // @@ -3186,28 +3282,30 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[A2:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SST:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG83:![0-9]+]] // CHECK5-NEXT: store double 0.000000e+00, ptr [[A]], align 8, !dbg [[DBG83]] // CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SST]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG84:![0-9]+]] // CHECK5-NEXT: store ptr [[A3]], ptr [[A2]], align 8, !dbg [[DBG84]] -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG85:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !dbg [[DBG86:![0-9]+]] -// CHECK5-NEXT: store double [[TMP1]], ptr [[A_CASTED]], align 8, !dbg [[DBG86]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG86]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[THIS1]], i64 [[TMP2]]), !dbg [[DBG86]] +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG85:![0-9]+]] +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8, !dbg [[DBG85]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG85]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG86:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !dbg [[DBG86]] +// CHECK5-NEXT: store double [[TMP3]], ptr [[TMP1]], align 8, !dbg [[DBG85]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG85]] // CHECK5-NEXT: ret void, !dbg [[DBG87:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG88:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG88:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 @@ -3215,42 +3313,46 @@ // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG89:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG89]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG90:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8, !dbg [[DBG91:![0-9]+]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG89:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: store double [[TMP4]], ptr [[A]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG89]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8, !dbg [[DBG91:![0-9]+]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB16:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG91]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG91]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB16:[0-9]+]], i32 [[TMP7]]), !dbg [[DBG91]] +// CHECK5-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0, !dbg [[DBG91]] +// CHECK5-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG91]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG92:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG93:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG92:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG93:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG92]] // CHECK5-NEXT: invoke void @_ZZN3SSTIdEC1EvENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG92]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB16]], i32 [[TMP3]]), !dbg [[DBG92]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB16]], i32 [[TMP7]]), !dbg [[DBG92]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] // CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG92]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG94:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB16]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP11]]), !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG94:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG92]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB16]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.5, i32 [[TMP15]]), !dbg [[DBG92]] // CHECK5-NEXT: ret void, !dbg [[DBG95:![0-9]+]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP12:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP13:%.*]] = extractvalue { ptr, i32 } [[TMP12]], 0, !dbg [[DBG92]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP13]]) #[[ATTR13]], !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0, !dbg [[DBG92]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR13]], !dbg [[DBG92]] // CHECK5-NEXT: unreachable, !dbg [[DBG92]] // // @@ -3295,7 +3397,7 @@ // CHECK5-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG105:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], ptr [[THIS1]], i32 0, i32 0 @@ -3305,54 +3407,60 @@ // CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8, !dbg [[DBG107:![0-9]+]] // CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00, !dbg [[DBG107]] // CHECK5-NEXT: store double [[INC]], ptr [[TMP3]], align 8, !dbg [[DBG107]] -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG108:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8, !dbg [[DBG109:![0-9]+]] -// CHECK5-NEXT: store double [[TMP7]], ptr [[A_CASTED]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG109]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB22:[0-9]+]], i32 2, ptr @.omp_outlined..6, ptr [[TMP1]], i64 [[TMP8]]), !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG108:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8, !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG109:![0-9]+]] +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP8]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: store double [[TMP9]], ptr [[TMP6]], align 8, !dbg [[DBG108]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB22:[0-9]+]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG108]] // CHECK5-NEXT: ret void, !dbg [[DBG110:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]]) #[[ATTR12]] !dbg [[DBG111:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] !dbg [[DBG111:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG112]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG113:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP1]], align 8, !dbg [[DBG114:![0-9]+]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: store double [[TMP4]], ptr [[A]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG112]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG113:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP5]], ptr [[_TMP1]], align 8, !dbg [[DBG114:![0-9]+]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB20:[0-9]+]], i32 [[TMP3]]), !dbg [[DBG114]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0, !dbg [[DBG114]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB20:[0-9]+]], i32 [[TMP7]]), !dbg [[DBG114]] +// CHECK5-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0, !dbg [[DBG114]] +// CHECK5-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG114]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG113]] -// CHECK5-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP6]], align 8, !dbg [[DBG115:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP7]], 1.000000e+00, !dbg [[DBG115]] -// CHECK5-NEXT: store double [[INC]], ptr [[TMP6]], align 8, !dbg [[DBG115]] -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB20]], i32 [[TMP3]]), !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG113]] +// CHECK5-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP10]], align 8, !dbg [[DBG115:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00, !dbg [[DBG115]] +// CHECK5-NEXT: store double [[INC]], ptr [[TMP10]], align 8, !dbg [[DBG115]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB20]], i32 [[TMP7]]), !dbg [[DBG115]] // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] // CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG115]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG115]] -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG116:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8, !dbg [[DBG115]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB20]], i32 [[TMP3]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP10]]), !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG116:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG115]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG115]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB20]], i32 [[TMP7]], i64 8, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.7, i32 [[TMP14]]), !dbg [[DBG115]] // CHECK5-NEXT: ret void, !dbg [[DBG117:![0-9]+]] // // @@ -3382,9 +3490,7 @@ // CHECK5-NEXT: [[A2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[B4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[C7:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 @@ -3409,177 +3515,187 @@ // CHECK5-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG128:![0-9]+]] // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C8]], align 8, !dbg [[DBG128]] // CHECK5-NEXT: store ptr [[TMP1]], ptr [[C7]], align 8, !dbg [[DBG128]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG129:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG130:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[B4]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[C7]], align 8, !dbg [[DBG131:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[C_CASTED]], align 4, !dbg [[DBG130]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[C_CASTED]], align 8, !dbg [[DBG130]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB26:[0-9]+]], i32 4, ptr @.omp_outlined..8, ptr [[THIS1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP9]]), !dbg [[DBG130]] -// CHECK5-NEXT: ret void, !dbg [[DBG132:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] +// CHECK5-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A2]], align 8, !dbg [[DBG130:![0-9]+]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG130]] +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP3]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[B4]], align 4, !dbg [[DBG131:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG129]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[C7]], align 8, !dbg [[DBG132:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG132]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP8]], align 8, !dbg [[DBG129]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB26:[0-9]+]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG129]] +// CHECK5-NEXT: ret void, !dbg [[DBG133:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG133:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG134:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_DID_IT:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_3:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG134:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG134]] -// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8, !dbg [[DBG134]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG135:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8, !dbg [[DBG136:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG137:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8, !dbg [[DBG136]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB24:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG136]] -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG136]] -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG136]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[B]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[C]], align 4, !dbg [[DBG135]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8, !dbg [[DBG135]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG136:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8, !dbg [[DBG137:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG138:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG137]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB24:[0-9]+]], i32 [[TMP12]]), !dbg [[DBG137]] +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0, !dbg [[DBG137]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG137]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG138:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG139:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2, !dbg [[DBG138]] -// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP10]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG139]] -// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG138]] +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 0, !dbg [[DBG139:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 1, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG140:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 2, !dbg [[DBG139]] +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP18]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[REF_TMP]], i32 0, i32 3, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG140]] +// CHECK5-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8, !dbg [[DBG139]] // CHECK5-NEXT: invoke void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG138]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG139]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB24]], i32 [[TMP4]]), !dbg [[DBG138]] -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG138]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG138]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB24]], i32 [[TMP12]]), !dbg [[DBG139]] +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG139]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG139]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG140:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG138]] -// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP15]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG141:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG138]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB24]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP18]]), !dbg [[DBG138]] -// CHECK5-NEXT: ret void, !dbg [[DBG142:![0-9]+]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG141:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG139]] +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP23]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG142:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG139]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB24]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.9, i32 [[TMP26]]), !dbg [[DBG139]] +// CHECK5-NEXT: ret void, !dbg [[DBG143:![0-9]+]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP19:%.*]] = landingpad { ptr, i32 } -// CHECK5-NEXT: catch ptr null, !dbg [[DBG138]] -// CHECK5-NEXT: [[TMP20:%.*]] = extractvalue { ptr, i32 } [[TMP19]], 0, !dbg [[DBG138]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP20]]) #[[ATTR13]], !dbg [[DBG138]] -// CHECK5-NEXT: unreachable, !dbg [[DBG138]] +// CHECK5-NEXT: [[TMP27:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: catch ptr null, !dbg [[DBG139]] +// CHECK5-NEXT: [[TMP28:%.*]] = extractvalue { ptr, i32 } [[TMP27]], 0, !dbg [[DBG139]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP28]]) #[[ATTR13]], !dbg [[DBG139]] +// CHECK5-NEXT: unreachable, !dbg [[DBG139]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZZN2SSC1ERiENKUlvE_clEv -// CHECK5-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG143:![0-9]+]] { +// CHECK5-SAME: (ptr noundef nonnull align 8 dereferenceable(32) [[THIS:%.*]]) #[[ATTR10]] align 2 !dbg [[DBG144:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_3:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG144:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG144]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG145:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG145]] -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4, !dbg [[DBG145]] -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG146:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG146]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG147:![0-9]+]] -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1, !dbg [[DBG147]] -// CHECK5-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4, !dbg [[DBG147]] -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG148:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG148]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1, !dbg [[DBG149]] -// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4, !dbg [[DBG149]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG150:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG150]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG151:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG152:![0-9]+]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8, !dbg [[DBG152]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: store i32 [[TMP17]], ptr [[B_CASTED]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[B_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG153:![0-9]+]] -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8, !dbg [[DBG153]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: store i32 [[TMP21]], ptr [[C_CASTED]], align 4, !dbg [[DBG151]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[C_CASTED]], align 8, !dbg [[DBG151]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB30:[0-9]+]], i32 4, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP14]], i64 [[TMP18]], i64 [[TMP22]]), !dbg [[DBG151]] -// CHECK5-NEXT: ret void, !dbg [[DBG154:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG145:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !dbg [[DBG145]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG146:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG146]] +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4, !dbg [[DBG146]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG147:![0-9]+]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG147]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4, !dbg [[DBG148:![0-9]+]] +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1, !dbg [[DBG148]] +// CHECK5-NEXT: store i32 [[DEC]], ptr [[TMP6]], align 4, !dbg [[DBG148]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG149:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8, !dbg [[DBG149]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 1, !dbg [[DBG150]] +// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP9]], align 4, !dbg [[DBG150]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG151:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP11]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 1, !dbg [[DBG152:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8, !dbg [[DBG152]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg [[DBG152]] +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 2, !dbg [[DBG153:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8, !dbg [[DBG153]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !dbg [[DBG153]] +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP16]], align 4, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG151]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], ptr [[THIS1]], i32 0, i32 3, !dbg [[DBG154:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8, !dbg [[DBG154]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4, !dbg [[DBG154]] +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP20]], align 8, !dbg [[DBG151]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB30:[0-9]+]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG151]] +// CHECK5-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.9 -// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG155:![0-9]+]] { +// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG156:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG156:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG157:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG157]] -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG158:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG158]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG156]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG159:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG159]] -// CHECK5-NEXT: ret void, !dbg [[DBG159]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG157:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG158:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG158]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG159:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG159]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG157]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG160:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG160]] +// CHECK5-NEXT: ret void, !dbg [[DBG160]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[C:%.*]]) #[[ATTR12]] !dbg [[DBG160:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] !dbg [[DBG161:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 @@ -3588,125 +3704,137 @@ // CHECK5-NEXT: [[DOTOMP_COPYPRIVATE_CPR_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] -// CHECK5-NEXT: store ptr [[A_ADDR]], ptr [[TMP]], align 8, !dbg [[DBG161]] -// CHECK5-NEXT: store ptr [[C_ADDR]], ptr [[_TMP1]], align 8, !dbg [[DBG161]] -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG162:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP1]], ptr [[_TMP2]], align 8, !dbg [[DBG163:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG164:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[_TMP3]], align 8, !dbg [[DBG163]] -// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB28:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG163]] -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0, !dbg [[DBG163]] -// CHECK5-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG163]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG162:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[B]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[C]], align 4, !dbg [[DBG162]] +// CHECK5-NEXT: store ptr [[A]], ptr [[TMP]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8, !dbg [[DBG162]] +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG163:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP9]], ptr [[_TMP2]], align 8, !dbg [[DBG164:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG165:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG164]] +// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB28:[0-9]+]], i32 [[TMP12]]), !dbg [[DBG164]] +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0, !dbg [[DBG164]] +// CHECK5-NEXT: br i1 [[TMP14]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG164]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG162]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG165:![0-9]+]] -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1, !dbg [[DBG165]] -// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP7]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_ADDR]], align 4, !dbg [[DBG166:![0-9]+]] -// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1, !dbg [[DBG166]] -// CHECK5-NEXT: store i32 [[DEC]], ptr [[B_ADDR]], align 4, !dbg [[DBG166]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG164]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !dbg [[DBG167:![0-9]+]] -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1, !dbg [[DBG167]] -// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP10]], align 4, !dbg [[DBG167]] -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB28]], i32 [[TMP4]]), !dbg [[DBG165]] -// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG165]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG163]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !dbg [[DBG166:![0-9]+]] +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP16]], 1, !dbg [[DBG166]] +// CHECK5-NEXT: store i32 [[INC]], ptr [[TMP15]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4, !dbg [[DBG167:![0-9]+]] +// CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP17]], -1, !dbg [[DBG167]] +// CHECK5-NEXT: store i32 [[DEC]], ptr [[B]], align 4, !dbg [[DBG167]] +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG165]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !dbg [[DBG168:![0-9]+]] +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP19]], 1, !dbg [[DBG168]] +// CHECK5-NEXT: store i32 [[DIV]], ptr [[TMP18]], align 4, !dbg [[DBG168]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB28]], i32 [[TMP12]]), !dbg [[DBG166]] +// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG166]] // CHECK5: omp_if.end: -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG168:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG165]] -// CHECK5-NEXT: store ptr [[B_ADDR]], ptr [[TMP14]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG169:![0-9]+]] -// CHECK5-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG165]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG165]] -// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB28]], i32 [[TMP4]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP17]]), !dbg [[DBG165]] -// CHECK5-NEXT: ret void, !dbg [[DBG170:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 0, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG169:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 1, !dbg [[DBG166]] +// CHECK5-NEXT: store ptr [[B]], ptr [[TMP22]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], i64 0, i64 2, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG170:![0-9]+]] +// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !dbg [[DBG166]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COPYPRIVATE_DID_IT]], align 4, !dbg [[DBG166]] +// CHECK5-NEXT: call void @__kmpc_copyprivate(ptr @[[GLOB28]], i32 [[TMP12]], i64 24, ptr [[DOTOMP_COPYPRIVATE_CPR_LIST]], ptr @.omp.copyprivate.copy_func.11, i32 [[TMP25]]), !dbg [[DBG166]] +// CHECK5-NEXT: ret void, !dbg [[DBG171:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp.copyprivate.copy_func.11 -// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG171:![0-9]+]] { +// CHECK5-SAME: (ptr noundef [[TMP0:%.*]], ptr noundef [[TMP1:%.*]]) #[[ATTR9]] !dbg [[DBG172:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG172:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG173:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG173]] -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG174:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG174]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG172]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG175:![0-9]+]] -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG175]] -// CHECK5-NEXT: ret void, !dbg [[DBG175]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG173:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG174:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[TMP5]], align 4, !dbg [[DBG174]] +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG175:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[TMP10]], align 4, !dbg [[DBG175]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 2, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8, !dbg [[DBG173]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4, !dbg [[DBG176:![0-9]+]] +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[TMP15]], align 4, !dbg [[DBG176]] +// CHECK5-NEXT: ret void, !dbg [[DBG176]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z15parallel_singlev -// CHECK5-SAME: () #[[ATTR10]] !dbg [[DBG176:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR10]] !dbg [[DBG177:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB35:[0-9]+]], i32 0, ptr @.omp_outlined..12), !dbg [[DBG177:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG178:![0-9]+]] +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB35:[0-9]+]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG178:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG179:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG179:![0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR12]] personality ptr @__gxx_personality_v0 !dbg [[DBG180:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG180:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG180]] -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB32:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG180]] -// CHECK5-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG180]] -// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG180]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG182:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG182]] +// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB32:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG182]] +// CHECK5-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0, !dbg [[DBG182]] +// CHECK5-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]], !dbg [[DBG182]] // CHECK5: omp_if.then: // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG181:![0-9]+]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG183:![0-9]+]] // CHECK5: invoke.cont: -// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB32]], i32 [[TMP1]]), !dbg [[DBG181]] -// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG181]] +// CHECK5-NEXT: call void @__kmpc_end_single(ptr @[[GLOB32]], i32 [[TMP2]]), !dbg [[DBG183]] +// CHECK5-NEXT: br label [[OMP_IF_END]], !dbg [[DBG183]] // CHECK5: omp_if.end: -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB33:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG182:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG182]] +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB33:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG184:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG184]] // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK5-NEXT: catch ptr null, !dbg [[DBG181]] -// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG181]] -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR13]], !dbg [[DBG181]] -// CHECK5-NEXT: unreachable, !dbg [[DBG181]] +// CHECK5-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: catch ptr null, !dbg [[DBG183]] +// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG183]] +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR13]], !dbg [[DBG183]] +// CHECK5-NEXT: unreachable, !dbg [[DBG183]] // // // CHECK5-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_single_codegen.cpp -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG183:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG185:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG184:![0-9]+]] -// CHECK5-NEXT: call void @__cxx_global_var_init.4(), !dbg [[DBG184]] -// CHECK5-NEXT: call void @.__omp_threadprivate_init_.(), !dbg [[DBG184]] -// CHECK5-NEXT: call void @.__omp_threadprivate_init_..3(), !dbg [[DBG184]] +// CHECK5-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG186:![0-9]+]] +// CHECK5-NEXT: call void @__cxx_global_var_init.4(), !dbg [[DBG186]] +// CHECK5-NEXT: call void @.__omp_threadprivate_init_.(), !dbg [[DBG186]] +// CHECK5-NEXT: call void @.__omp_threadprivate_init_..3(), !dbg [[DBG186]] // CHECK5-NEXT: ret void // // diff --git a/clang/test/OpenMP/single_firstprivate_codegen.cpp b/clang/test/OpenMP/single_firstprivate_codegen.cpp --- a/clang/test/OpenMP/single_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/single_firstprivate_codegen.cpp @@ -352,6 +352,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -360,22 +361,30 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined., ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2StC2Ev @@ -444,77 +453,76 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 -// CHECK1-NEXT: br i1 [[TMP7]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP1]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP4]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -732,39 +740,41 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK3-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[G]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[G]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK3-NEXT: store i32 17, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: store i32 17, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP10]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) -// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -920,37 +930,42 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr @_ZZ4mainE5sivar) +// CHECK4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK4-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP5:%.*]] = load volatile i32, ptr @g, align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[G]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load volatile i32, ptr @g, align 4 +// CHECK4-NEXT: store i32 [[TMP7]], ptr [[G]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[SIVAR]], align 4 // CHECK4-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK4-NEXT: store i32 37, ptr [[SIVAR1]], align 4 +// CHECK4-NEXT: store i32 37, ptr [[SIVAR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 8 // CHECK4-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 1 @@ -962,18 +977,18 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP7:%.*]] = load volatile i32, ptr [[G]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP7]], ptr [[BLOCK_CAPTURED]], align 8 -// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], ptr [[BLOCK_CAPTURED2]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK4-NEXT: call void [[TMP10]](ptr [[BLOCK]]) -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP9:%.*]] = load volatile i32, ptr [[G]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP9]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, i32 }>, ptr [[BLOCK]], i32 0, i32 6 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK4-NEXT: call void [[TMP12]](ptr [[BLOCK]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/single_private_codegen.cpp b/clang/test/OpenMP/single_private_codegen.cpp --- a/clang/test/OpenMP/single_private_codegen.cpp +++ b/clang/test/OpenMP/single_private_codegen.cpp @@ -100,6 +100,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 @@ -109,23 +110,23 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK1-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK1-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -152,10 +153,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -163,11 +165,13 @@ // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 @@ -180,27 +184,27 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: store i32 303, ptr [[SIVAR]], align 4 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -223,6 +227,7 @@ // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -231,22 +236,22 @@ // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done1: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP2]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -307,21 +312,24 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 @@ -334,26 +342,26 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done3: -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -412,32 +420,35 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 // CHECK3-NEXT: store i32 101, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP4]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP6]], align 8 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) -// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: ret void // // @@ -456,27 +467,31 @@ // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, double, i32 }>, align 8 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: store double 1.000000e+00, ptr [[G]], align 8 // CHECK4-NEXT: store i32 101, ptr [[SIVAR]], align 4 @@ -491,18 +506,18 @@ // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp.1, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[G]], align 8 -// CHECK4-NEXT: store volatile double [[TMP4]], ptr [[BLOCK_CAPTURED]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load volatile double, ptr [[G]], align 8 +// CHECK4-NEXT: store volatile double [[TMP5]], ptr [[BLOCK_CAPTURED]], align 8 // CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, double, i32 }>, ptr [[BLOCK]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[BLOCK_CAPTURED1]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[BLOCK_CAPTURED1]], align 8 // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK4-NEXT: call void [[TMP9]](ptr noundef [[BLOCK]]) -// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK4-NEXT: call void [[TMP8]](ptr noundef [[BLOCK]]) +// CHECK4-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -355,6 +355,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -380,27 +381,33 @@ // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float // CHECK1-NEXT: store float [[CONV5]], ptr [[SB_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[GC_ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[GD_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SD_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[GC_ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK1-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true8: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined., ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -408,34 +415,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -446,46 +453,52 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK1-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK1-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK1-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK1-NEXT: ret i32 [[ADD13]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8 @@ -500,140 +513,142 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr @Gb, align 8 -// CHECK1-NEXT: store double [[TMP6]], ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK1-NEXT: store float [[TMP8]], ptr [[SB_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[SB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr @Gc, align 8 -// CHECK1-NEXT: store double [[TMP10]], ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK1-NEXT: store i16 [[TMP12]], ptr [[C_CASTED]], align 2 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK1-NEXT: store float [[TMP14]], ptr [[SC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[SC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK1-NEXT: store i16 [[TMP16]], ptr [[D_CASTED]], align 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr @Gd, align 8 -// CHECK1-NEXT: store double [[TMP18]], ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK1-NEXT: store float [[TMP20]], ptr [[SD_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[SD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr @Ga, align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr @Gb, align 8 +// CHECK1-NEXT: store double [[TMP11]], ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK1-NEXT: store float [[TMP13]], ptr [[SB_CASTED]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[SB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double, ptr @Gc, align 8 +// CHECK1-NEXT: store double [[TMP15]], ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK1-NEXT: store i16 [[TMP17]], ptr [[C_CASTED]], align 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[C_CASTED]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK1-NEXT: store float [[TMP19]], ptr [[SC_CASTED]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[SC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK1-NEXT: store i16 [[TMP21]], ptr [[D_CASTED]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[D_CASTED]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr @Gd, align 8 +// CHECK1-NEXT: store double [[TMP23]], ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK1-NEXT: store float [[TMP25]], ptr [[SD_CASTED]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[SD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr @Ga, align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP27]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK1-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true2: -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP24]] to double +// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP29]] to double // CHECK1-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 9, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 -// CHECK1-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP59]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 9, ptr [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP69:%.*]] = icmp ne i32 [[TMP68]], 0 +// CHECK1-NEXT: br i1 [[TMP69]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -651,6 +666,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -676,27 +692,33 @@ // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float // CHECK1-NEXT: store float [[CONV5]], ptr [[SB_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[GC_ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[GD_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SD_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[GC_ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK1-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true8: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..2, ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -704,34 +726,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -761,46 +783,52 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK1-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK1-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK1-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK1-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK1-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK1-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK1-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK1-NEXT: ret i32 [[ADD13]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GB_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SB_CASTED:%.*]] = alloca i64, align 8 @@ -815,140 +843,142 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr @Gb, align 8 -// CHECK1-NEXT: store double [[TMP6]], ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[GB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK1-NEXT: store float [[TMP8]], ptr [[SB_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[SB_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr @Gc, align 8 -// CHECK1-NEXT: store double [[TMP10]], ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[GC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK1-NEXT: store i16 [[TMP12]], ptr [[C_CASTED]], align 2 -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[C_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK1-NEXT: store float [[TMP14]], ptr [[SC_CASTED]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[SC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK1-NEXT: store i16 [[TMP16]], ptr [[D_CASTED]], align 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[D_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr @Gd, align 8 -// CHECK1-NEXT: store double [[TMP18]], ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[GD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK1-NEXT: store float [[TMP20]], ptr [[SD_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[SD_CASTED]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr @Ga, align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP22]], 0.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK1-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr @Gb, align 8 +// CHECK1-NEXT: store double [[TMP11]], ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[GB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK1-NEXT: store float [[TMP13]], ptr [[SB_CASTED]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[SB_CASTED]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load double, ptr @Gc, align 8 +// CHECK1-NEXT: store double [[TMP15]], ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[GC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK1-NEXT: store i16 [[TMP17]], ptr [[C_CASTED]], align 2 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[C_CASTED]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK1-NEXT: store float [[TMP19]], ptr [[SC_CASTED]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[SC_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK1-NEXT: store i16 [[TMP21]], ptr [[D_CASTED]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[D_CASTED]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr @Gd, align 8 +// CHECK1-NEXT: store double [[TMP23]], ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[GD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK1-NEXT: store float [[TMP25]], ptr [[SD_CASTED]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[SD_CASTED]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr @Ga, align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP27]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP28]] to i32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK1-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true2: -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP24]] to double +// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP29]] to double // CHECK1-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP7]], ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP9]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP44]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5 +// CHECK1-NEXT: store ptr null, ptr [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8 +// CHECK1-NEXT: store ptr null, ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 9, ptr [[TMP55]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP53]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP60]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP62]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP64:%.*]] = icmp ne i32 [[TMP63]], 0 -// CHECK1-NEXT: br i1 [[TMP64]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP59]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 9, ptr [[TMP60]], align 4 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP69:%.*]] = icmp ne i32 [[TMP68]], 0 +// CHECK1-NEXT: br i1 [[TMP69]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP5]], i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: ret void @@ -966,6 +996,7 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -991,27 +1022,33 @@ // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float // CHECK1-NEXT: store float [[CONV5]], ptr [[SB_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[GC_ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[D_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[GD_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SD_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[GC_ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP7]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[CONV6]], 0 // CHECK1-NEXT: br i1 [[CMP7]], label [[LAND_LHS_TRUE8:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true8: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP6]] to double +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[CMP10:%.*]] = fcmp ogt double [[CONV9]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD_ADDR]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -1019,34 +1056,34 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK1-NEXT: ret void // // @@ -1237,6 +1274,7 @@ // CHECK3-NEXT: [[GB1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1271,27 +1309,33 @@ // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float // CHECK3-NEXT: store float [[CONV8]], ptr [[SB_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[GC2]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[GD3]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SD_ADDR]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[GC2]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK3-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true11: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined., ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1299,34 +1343,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // @@ -1337,46 +1381,52 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK3-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK3-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK3-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..1, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK3-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK3-NEXT: ret i32 [[ADD13]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 @@ -1388,131 +1438,133 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 -// CHECK3-NEXT: store float [[TMP6]], ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK3-NEXT: store i16 [[TMP8]], ptr [[C_CASTED]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 -// CHECK3-NEXT: store float [[TMP10]], ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK3-NEXT: store i16 [[TMP12]], ptr [[D_CASTED]], align 2 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[D_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 -// CHECK3-NEXT: store float [[TMP14]], ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, ptr @Ga, align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ3barssssE2Sb, align 4 +// CHECK3-NEXT: store float [[TMP11]], ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[C_CASTED]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[C_CASTED]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr @_ZZ3barssssE2Sc, align 4 +// CHECK3-NEXT: store float [[TMP15]], ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK3-NEXT: store i16 [[TMP17]], ptr [[D_CASTED]], align 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[D_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ3barssssE2Sd, align 4 +// CHECK3-NEXT: store float [[TMP19]], ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr @Ga, align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP21]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK3-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true2: -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP18]] to double +// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr @_ZZ3barssssE2Sa, align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP23]] to double // CHECK3-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP44]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 -// CHECK3-NEXT: store ptr null, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: store ptr null, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 9, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP56]], align 8 -// CHECK3-NEXT: [[TMP57:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 -// CHECK3-NEXT: br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP53]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 9, ptr [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP51]], ptr [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP61]], align 8 +// CHECK3-NEXT: [[TMP62:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0 +// CHECK3-NEXT: br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barssss_l94(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1533,6 +1585,7 @@ // CHECK3-NEXT: [[GB1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1567,27 +1620,33 @@ // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float // CHECK3-NEXT: store float [[CONV8]], ptr [[SB_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[GC2]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[GD3]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SD_ADDR]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[GC2]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK3-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true11: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..2, ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1595,34 +1654,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // @@ -1652,46 +1711,52 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i16 [[A]], ptr [[A_ADDR]], align 2 // CHECK3-NEXT: store i16 [[B]], ptr [[B_ADDR]], align 2 // CHECK3-NEXT: store i16 [[C]], ptr [[C_ADDR]], align 2 // CHECK3-NEXT: store i16 [[D]], ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..5, ptr [[A_ADDR]], ptr [[B_ADDR]], ptr [[C_ADDR]], ptr [[D_ADDR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[B_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[C_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[A_ADDR]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP5]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV1]] -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[CONV2]] -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[D_ADDR]], align 2 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[D_ADDR]], align 2 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CONV4]] -// CHECK3-NEXT: [[TMP4:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP4]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK3-NEXT: [[CONV6:%.*]] = fptosi float [[TMP8]] to i32 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[ADD5]], [[CONV6]] -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK3-NEXT: [[CONV8:%.*]] = fptosi float [[TMP9]] to i32 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[ADD7]], [[CONV8]] -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP6]] to i32 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = fptosi float [[TMP10]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CONV10]] -// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fptosi float [[TMP11]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD11]], [[CONV12]] // CHECK3-NEXT: ret i32 [[ADD13]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SB_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_CASTED:%.*]] = alloca i32, align 4 @@ -1703,131 +1768,133 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [9 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 -// CHECK3-NEXT: store float [[TMP6]], ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[SB_CASTED]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[TMP2]], align 2 -// CHECK3-NEXT: store i16 [[TMP8]], ptr [[C_CASTED]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[C_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 -// CHECK3-NEXT: store float [[TMP10]], ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[SC_CASTED]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP3]], align 2 -// CHECK3-NEXT: store i16 [[TMP12]], ptr [[D_CASTED]], align 2 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[D_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 -// CHECK3-NEXT: store float [[TMP14]], ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SD_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, ptr @Ga, align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP16]], 0.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP4]], align 2 +// CHECK3-NEXT: store i16 [[TMP9]], ptr [[B_CASTED]], align 2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sb, align 4 +// CHECK3-NEXT: store float [[TMP11]], ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SB_CASTED]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP6]], align 2 +// CHECK3-NEXT: store i16 [[TMP13]], ptr [[C_CASTED]], align 2 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[C_CASTED]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sc, align 4 +// CHECK3-NEXT: store float [[TMP15]], ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SC_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP8]], align 2 +// CHECK3-NEXT: store i16 [[TMP17]], ptr [[D_CASTED]], align 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[D_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sd, align 4 +// CHECK3-NEXT: store float [[TMP19]], ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SD_CASTED]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr @Ga, align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP21]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP22]] to i32 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0 // CHECK3-NEXT: br i1 [[CMP1]], label [[LAND_LHS_TRUE2:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true2: -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP18]] to double +// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr @_ZZ4tbarIsEiT_S0_S0_S0_E2Sa, align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP23]] to double // CHECK3-NEXT: [[CMP4:%.*]] = fcmp ogt double [[CONV3]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr @Gb, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr @Gc, ptr [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr null, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP34]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP39]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr @Gd, ptr [[TMP41]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP43]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP44]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 -// CHECK3-NEXT: store ptr null, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr @Gb, ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr @Gc, ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr null, ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr @Gd, ptr [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP47]], align 4 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP48]], align 4 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 8 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 8 +// CHECK3-NEXT: store ptr null, ptr [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 9, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP55]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP56]], align 8 -// CHECK3-NEXT: [[TMP57:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP58:%.*]] = icmp ne i32 [[TMP57]], 0 -// CHECK3-NEXT: br i1 [[TMP58]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP53]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 9, ptr [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP51]], ptr [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP52]], ptr [[TMP56]], align 4 +// CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP61]], align 8 +// CHECK3-NEXT: [[TMP62:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP63:%.*]] = icmp ne i32 [[TMP62]], 0 +// CHECK3-NEXT: br i1 [[TMP63]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP5]], ptr @Gb, i32 [[TMP7]], ptr @Gc, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr @Gd, i32 [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4tbarIsEiT_S0_S0_S0__l145(i32 [[TMP10]], ptr @Gb, i32 [[TMP12]], ptr @Gc, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr @Gd, i32 [[TMP20]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: ret void @@ -1848,6 +1915,7 @@ // CHECK3-NEXT: [[GB1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GC2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[GD3:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1882,27 +1950,33 @@ // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float // CHECK3-NEXT: store float [[CONV8]], ptr [[SB_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[GC2]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[D_ADDR]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[GD3]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SD_ADDR]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[GC2]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP13]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[C_ADDR]], align 2 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[C_ADDR]], align 2 +// CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[CONV9]], 0 // CHECK3-NEXT: br i1 [[CMP10]], label [[LAND_LHS_TRUE11:%.*]], label [[OMP_IF_ELSE]] // CHECK3: land.lhs.true11: -// CHECK3-NEXT: [[TMP12:%.*]] = load float, ptr [[SC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP12]] to double +// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV12:%.*]] = fpext float [[TMP15]] to double // CHECK3-NEXT: [[CMP13:%.*]] = fcmp ogt double [[CONV12]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[D_ADDR]], ptr [[GD3]], ptr [[SD_ADDR]]) #[[ATTR2]] +// CHECK3-NEXT: call void @.omp_outlined..6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1910,34 +1984,34 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[D:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[GD:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SD:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GD_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SD_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GD]], ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SD]], ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SD_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP4]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP5]] to double +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[TMP2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP8]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP9]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 // CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float -// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: store float [[CONV5]], ptr [[TMP6]], align 4 // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_data_map_codegen_hold.cpp b/clang/test/OpenMP/target_data_map_codegen_hold.cpp --- a/clang/test/OpenMP/target_data_map_codegen_hold.cpp +++ b/clang/test/OpenMP/target_data_map_codegen_hold.cpp @@ -54,22 +54,26 @@ //. // CHECK-PPC64LE: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 20] -// CHECK-PPC64LE: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 [[#0x2001]]] +// CHECK-PPC64LE: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 8193] +// CHECK-PPC64LE: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-PPC64LE: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 // CHECK-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 20] -// CHECK-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2405]]] +// CHECK-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9221] // CHECK-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [1 x i64] [i64 4] -// CHECK-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [1 x i64] [i64 [[#0x2003]]] +// CHECK-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [1 x i64] [i64 8195] // CHECK-PPC64LE: @.offload_sizes.5 = private unnamed_addr constant [11 x i64] [i64 0, i64 4, i64 8, i64 8, i64 4, i64 4, i64 0, i64 4, i64 8, i64 8, i64 4] -// CHECK-PPC64LE: @.offload_maptypes.6 = private unnamed_addr constant [11 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002010]], i64 [[#0x2010]], i64 [[#0x2013]], i64 [[#0x3]], i64 [[#0x2000]], i64 [[#0x7000000002003]], i64 [[#0x7000000002010]], i64 [[#0x2010]], i64 [[#0x2013]]] +// CHECK-PPC64LE: @.offload_maptypes.6 = private unnamed_addr constant [11 x i64] [i64 8192, i64 281474976718851, i64 281474976718864, i64 8208, i64 8211, i64 3, i64 8192, i64 1970324836982787, i64 1970324836982800, i64 8208, i64 8211] //. // CHECK-I386: @.offload_sizes = private unnamed_addr constant [1 x i64] [i64 20] -// CHECK-I386: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 [[#0x2001]]] +// CHECK-I386: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 8193] +// CHECK-I386: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-I386: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 // CHECK-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 20] -// CHECK-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2405]]] +// CHECK-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9221] // CHECK-I386: @.offload_sizes.3 = private unnamed_addr constant [1 x i64] [i64 4] -// CHECK-I386: @.offload_maptypes.4 = private unnamed_addr constant [1 x i64] [i64 [[#0x2003]]] +// CHECK-I386: @.offload_maptypes.4 = private unnamed_addr constant [1 x i64] [i64 8195] // CHECK-I386: @.offload_sizes.5 = private unnamed_addr constant [11 x i64] [i64 0, i64 4, i64 4, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4, i64 4, i64 4] -// CHECK-I386: @.offload_maptypes.6 = private unnamed_addr constant [11 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002010]], i64 [[#0x2010]], i64 [[#0x2013]], i64 [[#0x3]], i64 [[#0x2000]], i64 [[#0x7000000002003]], i64 [[#0x7000000002010]], i64 [[#0x2010]], i64 [[#0x2013]]] +// CHECK-I386: @.offload_maptypes.6 = private unnamed_addr constant [11 x i64] [i64 8192, i64 281474976718851, i64 281474976718864, i64 8208, i64 8211, i64 3, i64 8192, i64 1970324836982787, i64 1970324836982800, i64 8208, i64 8211] //. // CHECK-PPC64LE-LABEL: @_Z3fooi( // CHECK-PPC64LE-NEXT: entry: @@ -93,189 +97,189 @@ // CHECK-PPC64LE-NEXT: store i32 [[ARG:%.*]], ptr [[ARG_ADDR]], align 4 // CHECK-PPC64LE-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-PPC64LE-NEXT: store ptr [[LB]], ptr [[TMP0]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store ptr [[LB]], ptr [[TMP2]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, ptr [[TMP5]], ptr [[TMP6]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) -// CHECK-PPC64LE-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-PPC64LE-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store ptr [[LB]], ptr [[TMP1]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, ptr [[TMP3]], ptr [[TMP4]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-PPC64LE-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK-PPC64LE-NEXT: store i32 [[INC]], ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP8]], ptr [[TMP9]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) -// CHECK-PPC64LE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store ptr [[LB]], ptr [[TMP10]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP6]], ptr [[TMP7]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store ptr [[LB]], ptr [[TMP8]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store ptr [[LB]], ptr [[TMP9]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP10]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK-PPC64LE-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store ptr [[LB]], ptr [[TMP12]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP15]], ptr [[TMP16]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) -// CHECK-PPC64LE-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP11]], ptr [[TMP12]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-PPC64LE-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK-PPC64LE-NEXT: store i32 [[INC4]], ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP18]], ptr [[TMP19]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) -// CHECK-PPC64LE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP20]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP22]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP24]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP25]], ptr [[TMP26]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) -// CHECK-PPC64LE-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[INC8:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK-PPC64LE-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP14]], ptr [[TMP15]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP16]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP17]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i64 0, i64 0 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP19]], ptr [[TMP20]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-PPC64LE-NEXT: [[INC8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK-PPC64LE-NEXT: store i32 [[INC8]], ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP28]], ptr [[TMP29]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP22]], ptr [[TMP23]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PS1]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP25:%.*]] = load ptr, ptr [[PS1]], align 8 +// CHECK-PPC64LE-NEXT: [[S:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[TMP25]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PS1]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP27:%.*]] = load ptr, ptr [[PS1]], align 8 +// CHECK-PPC64LE-NEXT: [[PS:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP27]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP28:%.*]] = load ptr, ptr [[PS1]], align 8 +// CHECK-PPC64LE-NEXT: [[PS9:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP28]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP29:%.*]] = load ptr, ptr [[PS9]], align 8 +// CHECK-PPC64LE-NEXT: [[PS10:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP29]], i32 0, i32 1 // CHECK-PPC64LE-NEXT: [[TMP30:%.*]] = load ptr, ptr [[PS1]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PS1]], align 8 -// CHECK-PPC64LE-NEXT: [[S:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[TMP31]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP32:%.*]] = load ptr, ptr [[PS1]], align 8 +// CHECK-PPC64LE-NEXT: [[PS11:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP30]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PS11]], align 8 +// CHECK-PPC64LE-NEXT: [[PS12:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP31]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP32:%.*]] = load ptr, ptr [[PS12]], align 8 +// CHECK-PPC64LE-NEXT: [[PS13:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP32]], i32 0, i32 1 // CHECK-PPC64LE-NEXT: [[TMP33:%.*]] = load ptr, ptr [[PS1]], align 8 -// CHECK-PPC64LE-NEXT: [[PS:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP33]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP34:%.*]] = load ptr, ptr [[PS1]], align 8 -// CHECK-PPC64LE-NEXT: [[PS9:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP34]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP35:%.*]] = load ptr, ptr [[PS9]], align 8 -// CHECK-PPC64LE-NEXT: [[PS10:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP35]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PS1]], align 8 -// CHECK-PPC64LE-NEXT: [[PS11:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP36]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP37:%.*]] = load ptr, ptr [[PS11]], align 8 -// CHECK-PPC64LE-NEXT: [[PS12:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP37]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP38:%.*]] = load ptr, ptr [[PS12]], align 8 -// CHECK-PPC64LE-NEXT: [[PS13:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP38]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP39:%.*]] = load ptr, ptr [[PS1]], align 8 -// CHECK-PPC64LE-NEXT: [[PS14:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP39]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP40:%.*]] = load ptr, ptr [[PS14]], align 8 -// CHECK-PPC64LE-NEXT: [[PS15:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP40]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PS15]], align 8 -// CHECK-PPC64LE-NEXT: [[PS16:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP41]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP42:%.*]] = load ptr, ptr [[PS16]], align 8 -// CHECK-PPC64LE-NEXT: [[S17:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP42]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP43:%.*]] = getelementptr ptr, ptr [[PS]], i32 1 -// CHECK-PPC64LE-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK-PPC64LE-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[S]] to i64 -// CHECK-PPC64LE-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] -// CHECK-PPC64LE-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK-PPC64LE-NEXT: [[TMP50:%.*]] = load ptr, ptr [[PS2]], align 8 +// CHECK-PPC64LE-NEXT: [[PS14:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP33]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP34:%.*]] = load ptr, ptr [[PS14]], align 8 +// CHECK-PPC64LE-NEXT: [[PS15:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP34]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP35:%.*]] = load ptr, ptr [[PS15]], align 8 +// CHECK-PPC64LE-NEXT: [[PS16:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP35]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PS16]], align 8 +// CHECK-PPC64LE-NEXT: [[S17:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP36]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP37:%.*]] = getelementptr ptr, ptr [[PS]], i32 1 +// CHECK-PPC64LE-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64 +// CHECK-PPC64LE-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[S]] to i64 +// CHECK-PPC64LE-NEXT: [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]] +// CHECK-PPC64LE-NEXT: [[TMP41:%.*]] = sdiv exact i64 [[TMP40]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-PPC64LE-NEXT: [[TMP42:%.*]] = load ptr, ptr [[PS2]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP43:%.*]] = load ptr, ptr [[PS2]], align 8 +// CHECK-PPC64LE-NEXT: [[S18:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP43]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP44:%.*]] = load ptr, ptr [[PS2]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP45:%.*]] = load ptr, ptr [[PS2]], align 8 +// CHECK-PPC64LE-NEXT: [[PS19:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP45]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PS2]], align 8 +// CHECK-PPC64LE-NEXT: [[PS20:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP46]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP47:%.*]] = load ptr, ptr [[PS20]], align 8 +// CHECK-PPC64LE-NEXT: [[PS21:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP47]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP48:%.*]] = load ptr, ptr [[PS2]], align 8 +// CHECK-PPC64LE-NEXT: [[PS22:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP48]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PS22]], align 8 +// CHECK-PPC64LE-NEXT: [[PS23:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP49]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP50:%.*]] = load ptr, ptr [[PS23]], align 8 +// CHECK-PPC64LE-NEXT: [[PS24:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP50]], i32 0, i32 1 // CHECK-PPC64LE-NEXT: [[TMP51:%.*]] = load ptr, ptr [[PS2]], align 8 -// CHECK-PPC64LE-NEXT: [[S18:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP51]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP52:%.*]] = load ptr, ptr [[PS2]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP53:%.*]] = load ptr, ptr [[PS2]], align 8 -// CHECK-PPC64LE-NEXT: [[PS19:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP53]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP54:%.*]] = load ptr, ptr [[PS2]], align 8 -// CHECK-PPC64LE-NEXT: [[PS20:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP54]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP55:%.*]] = load ptr, ptr [[PS20]], align 8 -// CHECK-PPC64LE-NEXT: [[PS21:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP55]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP56:%.*]] = load ptr, ptr [[PS2]], align 8 -// CHECK-PPC64LE-NEXT: [[PS22:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP56]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP57:%.*]] = load ptr, ptr [[PS22]], align 8 -// CHECK-PPC64LE-NEXT: [[PS23:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP57]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP58:%.*]] = load ptr, ptr [[PS23]], align 8 -// CHECK-PPC64LE-NEXT: [[PS24:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP58]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP59:%.*]] = load ptr, ptr [[PS2]], align 8 -// CHECK-PPC64LE-NEXT: [[PS25:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP59]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP60:%.*]] = load ptr, ptr [[PS25]], align 8 -// CHECK-PPC64LE-NEXT: [[PS26:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP60]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP61:%.*]] = load ptr, ptr [[PS26]], align 8 -// CHECK-PPC64LE-NEXT: [[PS27:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP61]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: [[TMP62:%.*]] = load ptr, ptr [[PS27]], align 8 -// CHECK-PPC64LE-NEXT: [[S28:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP62]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP63:%.*]] = getelementptr ptr, ptr [[PS19]], i32 1 -// CHECK-PPC64LE-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP63]] to i64 -// CHECK-PPC64LE-NEXT: [[TMP67:%.*]] = ptrtoint ptr [[S18]] to i64 -// CHECK-PPC64LE-NEXT: [[TMP68:%.*]] = sub i64 [[TMP66]], [[TMP67]] -// CHECK-PPC64LE-NEXT: [[TMP69:%.*]] = sdiv exact i64 [[TMP68]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-PPC64LE-NEXT: [[PS25:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP51]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP52:%.*]] = load ptr, ptr [[PS25]], align 8 +// CHECK-PPC64LE-NEXT: [[PS26:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP52]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP53:%.*]] = load ptr, ptr [[PS26]], align 8 +// CHECK-PPC64LE-NEXT: [[PS27:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP53]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: [[TMP54:%.*]] = load ptr, ptr [[PS27]], align 8 +// CHECK-PPC64LE-NEXT: [[S28:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP54]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP55:%.*]] = getelementptr ptr, ptr [[PS19]], i32 1 +// CHECK-PPC64LE-NEXT: [[TMP56:%.*]] = ptrtoint ptr [[TMP55]] to i64 +// CHECK-PPC64LE-NEXT: [[TMP57:%.*]] = ptrtoint ptr [[S18]] to i64 +// CHECK-PPC64LE-NEXT: [[TMP58:%.*]] = sub i64 [[TMP56]], [[TMP57]] +// CHECK-PPC64LE-NEXT: [[TMP59:%.*]] = sdiv exact i64 [[TMP58]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK-PPC64LE-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.5, i64 88, i1 false) -// CHECK-PPC64LE-NEXT: [[TMP71:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store ptr [[TMP30]], ptr [[TMP71]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP73:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store ptr [[S]], ptr [[TMP73]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP75:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: store i64 [[TMP49]], ptr [[TMP75]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP76:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP76]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP77:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: store ptr [[TMP30]], ptr [[TMP77]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP79:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 1 -// CHECK-PPC64LE-NEXT: store ptr [[S]], ptr [[TMP79]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP81:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP81]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP82:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2 -// CHECK-PPC64LE-NEXT: store ptr [[PS]], ptr [[TMP82]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP84:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 2 -// CHECK-PPC64LE-NEXT: store ptr [[PS10]], ptr [[TMP84]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP86:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP86]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP87:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3 -// CHECK-PPC64LE-NEXT: store ptr [[PS10]], ptr [[TMP87]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP89:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 3 -// CHECK-PPC64LE-NEXT: store ptr [[PS13]], ptr [[TMP89]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP91:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3 +// CHECK-PPC64LE-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store ptr [[TMP24]], ptr [[TMP60]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store ptr [[S]], ptr [[TMP61]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP62:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: store i64 [[TMP41]], ptr [[TMP62]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 0 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP64:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: store ptr [[TMP24]], ptr [[TMP64]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 1 +// CHECK-PPC64LE-NEXT: store ptr [[S]], ptr [[TMP65]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP66:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 1 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP67:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2 +// CHECK-PPC64LE-NEXT: store ptr [[PS]], ptr [[TMP67]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP68:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 2 +// CHECK-PPC64LE-NEXT: store ptr [[PS10]], ptr [[TMP68]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP69:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 2 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP69]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP70:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3 +// CHECK-PPC64LE-NEXT: store ptr [[PS10]], ptr [[TMP70]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP71:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 3 +// CHECK-PPC64LE-NEXT: store ptr [[PS13]], ptr [[TMP71]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP72:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 3 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP72]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP73:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4 +// CHECK-PPC64LE-NEXT: store ptr [[PS13]], ptr [[TMP73]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP74:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 4 +// CHECK-PPC64LE-NEXT: store ptr [[S17]], ptr [[TMP74]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP75:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP75]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP76:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5 +// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP76]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP77:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 5 +// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP77]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP78:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP78]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP79:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6 +// CHECK-PPC64LE-NEXT: store ptr [[TMP42]], ptr [[TMP79]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP80:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 6 +// CHECK-PPC64LE-NEXT: store ptr [[S18]], ptr [[TMP80]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP81:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 6 +// CHECK-PPC64LE-NEXT: store i64 [[TMP59]], ptr [[TMP81]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP82:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP82]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP83:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7 +// CHECK-PPC64LE-NEXT: store ptr [[TMP42]], ptr [[TMP83]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP84:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 7 +// CHECK-PPC64LE-NEXT: store ptr [[S18]], ptr [[TMP84]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP85:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP85]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP86:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8 +// CHECK-PPC64LE-NEXT: store ptr [[PS19]], ptr [[TMP86]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP87:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 8 +// CHECK-PPC64LE-NEXT: store ptr [[PS21]], ptr [[TMP87]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP88:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP88]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP89:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9 +// CHECK-PPC64LE-NEXT: store ptr [[PS21]], ptr [[TMP89]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP90:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 9 +// CHECK-PPC64LE-NEXT: store ptr [[PS24]], ptr [[TMP90]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP91:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9 // CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP91]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP92:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4 -// CHECK-PPC64LE-NEXT: store ptr [[PS13]], ptr [[TMP92]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP94:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 4 -// CHECK-PPC64LE-NEXT: store ptr [[S17]], ptr [[TMP94]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP96:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 4 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP96]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP97:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5 -// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP97]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP99:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 5 -// CHECK-PPC64LE-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP99]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP101:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 5 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP101]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP102:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6 -// CHECK-PPC64LE-NEXT: store ptr [[TMP50]], ptr [[TMP102]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP104:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 6 -// CHECK-PPC64LE-NEXT: store ptr [[S18]], ptr [[TMP104]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP106:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 6 -// CHECK-PPC64LE-NEXT: store i64 [[TMP69]], ptr [[TMP106]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP107:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 6 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP107]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP108:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7 -// CHECK-PPC64LE-NEXT: store ptr [[TMP50]], ptr [[TMP108]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP110:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 7 -// CHECK-PPC64LE-NEXT: store ptr [[S18]], ptr [[TMP110]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP112:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 7 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP112]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP113:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8 -// CHECK-PPC64LE-NEXT: store ptr [[PS19]], ptr [[TMP113]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP115:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 8 -// CHECK-PPC64LE-NEXT: store ptr [[PS21]], ptr [[TMP115]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP117:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 8 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP117]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP118:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9 -// CHECK-PPC64LE-NEXT: store ptr [[PS21]], ptr [[TMP118]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP120:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 9 -// CHECK-PPC64LE-NEXT: store ptr [[PS24]], ptr [[TMP120]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP122:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 9 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP122]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10 -// CHECK-PPC64LE-NEXT: store ptr [[PS24]], ptr [[TMP123]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 10 -// CHECK-PPC64LE-NEXT: store ptr [[S28]], ptr [[TMP125]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP127:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 10 -// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP127]], align 8 -// CHECK-PPC64LE-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP129:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP128]], ptr [[TMP129]], ptr [[TMP130]], ptr @.offload_maptypes.6, ptr null, ptr null) -// CHECK-PPC64LE-NEXT: [[TMP131:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[INC32:%.*]] = add nsw i32 [[TMP131]], 1 +// CHECK-PPC64LE-NEXT: [[TMP92:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10 +// CHECK-PPC64LE-NEXT: store ptr [[PS24]], ptr [[TMP92]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP93:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 10 +// CHECK-PPC64LE-NEXT: store ptr [[S28]], ptr [[TMP93]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP94:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i64 0, i64 10 +// CHECK-PPC64LE-NEXT: store ptr null, ptr [[TMP94]], align 8 +// CHECK-PPC64LE-NEXT: [[TMP95:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP96:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP97:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP95]], ptr [[TMP96]], ptr [[TMP97]], ptr @.offload_maptypes.6, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-PPC64LE-NEXT: [[INC32:%.*]] = add nsw i32 [[TMP98]], 1 // CHECK-PPC64LE-NEXT: store i32 [[INC32]], ptr [[ARG_ADDR]], align 4 -// CHECK-PPC64LE-NEXT: [[TMP132:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: [[TMP134:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP132]], ptr [[TMP133]], ptr [[TMP134]], ptr @.offload_maptypes.6, ptr null, ptr null) +// CHECK-PPC64LE-NEXT: [[TMP99:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP100:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: [[TMP101:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-PPC64LE-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP99]], ptr [[TMP100]], ptr [[TMP101]], ptr @.offload_maptypes.6, ptr null, ptr null) // CHECK-PPC64LE-NEXT: ret void // // CHECK-I386-LABEL: @_Z3fooi( @@ -300,189 +304,189 @@ // CHECK-I386-NEXT: store i32 [[ARG:%.*]], ptr [[ARG_ADDR]], align 4 // CHECK-I386-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-I386-NEXT: store ptr [[LB]], ptr [[TMP0]], align 4 -// CHECK-I386-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr [[LB]], ptr [[TMP2]], align 4 -// CHECK-I386-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK-I386-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, ptr [[TMP5]], ptr [[TMP6]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) -// CHECK-I386-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK-I386-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr [[LB]], ptr [[TMP1]], align 4 +// CHECK-I386-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK-I386-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, ptr [[TMP3]], ptr [[TMP4]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-I386-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 // CHECK-I386-NEXT: store i32 [[INC]], ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP8]], ptr [[TMP9]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) -// CHECK-I386-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr [[LB]], ptr [[TMP10]], align 4 +// CHECK-I386-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP6]], ptr [[TMP7]], ptr @.offload_sizes, ptr @.offload_maptypes, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr [[LB]], ptr [[TMP8]], align 4 +// CHECK-I386-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr [[LB]], ptr [[TMP9]], align 4 +// CHECK-I386-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP10]], align 4 +// CHECK-I386-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK-I386-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr [[LB]], ptr [[TMP12]], align 4 -// CHECK-I386-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK-I386-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP15]], ptr [[TMP16]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) -// CHECK-I386-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP11]], ptr [[TMP12]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-I386-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK-I386-NEXT: store i32 [[INC4]], ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP18]], ptr [[TMP19]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) -// CHECK-I386-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP20]], align 4 -// CHECK-I386-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP22]], align 4 -// CHECK-I386-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP24]], align 4 -// CHECK-I386-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP25]], ptr [[TMP26]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) -// CHECK-I386-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[INC8:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK-I386-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP14]], ptr [[TMP15]], ptr @.offload_sizes.1, ptr @.offload_maptypes.2, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP16]], align 4 +// CHECK-I386-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP17]], align 4 +// CHECK-I386-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS7]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP18]], align 4 +// CHECK-I386-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP19]], ptr [[TMP20]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-I386-NEXT: [[INC8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK-I386-NEXT: store i32 [[INC8]], ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP28]], ptr [[TMP29]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 +// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 1, ptr [[TMP22]], ptr [[TMP23]], ptr @.offload_sizes.3, ptr @.offload_maptypes.4, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP24:%.*]] = load ptr, ptr [[PS1]], align 4 +// CHECK-I386-NEXT: [[TMP25:%.*]] = load ptr, ptr [[PS1]], align 4 +// CHECK-I386-NEXT: [[S:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[TMP25]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP26:%.*]] = load ptr, ptr [[PS1]], align 4 +// CHECK-I386-NEXT: [[TMP27:%.*]] = load ptr, ptr [[PS1]], align 4 +// CHECK-I386-NEXT: [[PS:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP27]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP28:%.*]] = load ptr, ptr [[PS1]], align 4 +// CHECK-I386-NEXT: [[PS9:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP28]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP29:%.*]] = load ptr, ptr [[PS9]], align 4 +// CHECK-I386-NEXT: [[PS10:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP29]], i32 0, i32 1 // CHECK-I386-NEXT: [[TMP30:%.*]] = load ptr, ptr [[PS1]], align 4 -// CHECK-I386-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PS1]], align 4 -// CHECK-I386-NEXT: [[S:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[TMP31]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP32:%.*]] = load ptr, ptr [[PS1]], align 4 +// CHECK-I386-NEXT: [[PS11:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP30]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP31:%.*]] = load ptr, ptr [[PS11]], align 4 +// CHECK-I386-NEXT: [[PS12:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP31]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP32:%.*]] = load ptr, ptr [[PS12]], align 4 +// CHECK-I386-NEXT: [[PS13:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP32]], i32 0, i32 1 // CHECK-I386-NEXT: [[TMP33:%.*]] = load ptr, ptr [[PS1]], align 4 -// CHECK-I386-NEXT: [[PS:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP33]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP34:%.*]] = load ptr, ptr [[PS1]], align 4 -// CHECK-I386-NEXT: [[PS9:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP34]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP35:%.*]] = load ptr, ptr [[PS9]], align 4 -// CHECK-I386-NEXT: [[PS10:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP35]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PS1]], align 4 -// CHECK-I386-NEXT: [[PS11:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP36]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP37:%.*]] = load ptr, ptr [[PS11]], align 4 -// CHECK-I386-NEXT: [[PS12:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP37]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP38:%.*]] = load ptr, ptr [[PS12]], align 4 -// CHECK-I386-NEXT: [[PS13:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP38]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP39:%.*]] = load ptr, ptr [[PS1]], align 4 -// CHECK-I386-NEXT: [[PS14:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP39]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP40:%.*]] = load ptr, ptr [[PS14]], align 4 -// CHECK-I386-NEXT: [[PS15:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP40]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP41:%.*]] = load ptr, ptr [[PS15]], align 4 -// CHECK-I386-NEXT: [[PS16:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP41]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP42:%.*]] = load ptr, ptr [[PS16]], align 4 -// CHECK-I386-NEXT: [[S17:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP42]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP43:%.*]] = getelementptr ptr, ptr [[PS]], i32 1 -// CHECK-I386-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK-I386-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[S]] to i64 -// CHECK-I386-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] -// CHECK-I386-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK-I386-NEXT: [[TMP50:%.*]] = load ptr, ptr [[PS2]], align 4 +// CHECK-I386-NEXT: [[PS14:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP33]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP34:%.*]] = load ptr, ptr [[PS14]], align 4 +// CHECK-I386-NEXT: [[PS15:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP34]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP35:%.*]] = load ptr, ptr [[PS15]], align 4 +// CHECK-I386-NEXT: [[PS16:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP35]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP36:%.*]] = load ptr, ptr [[PS16]], align 4 +// CHECK-I386-NEXT: [[S17:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP36]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP37:%.*]] = getelementptr ptr, ptr [[PS]], i32 1 +// CHECK-I386-NEXT: [[TMP38:%.*]] = ptrtoint ptr [[TMP37]] to i64 +// CHECK-I386-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[S]] to i64 +// CHECK-I386-NEXT: [[TMP40:%.*]] = sub i64 [[TMP38]], [[TMP39]] +// CHECK-I386-NEXT: [[TMP41:%.*]] = sdiv exact i64 [[TMP40]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-I386-NEXT: [[TMP42:%.*]] = load ptr, ptr [[PS2]], align 4 +// CHECK-I386-NEXT: [[TMP43:%.*]] = load ptr, ptr [[PS2]], align 4 +// CHECK-I386-NEXT: [[S18:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP43]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP44:%.*]] = load ptr, ptr [[PS2]], align 4 +// CHECK-I386-NEXT: [[TMP45:%.*]] = load ptr, ptr [[PS2]], align 4 +// CHECK-I386-NEXT: [[PS19:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP45]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP46:%.*]] = load ptr, ptr [[PS2]], align 4 +// CHECK-I386-NEXT: [[PS20:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP46]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP47:%.*]] = load ptr, ptr [[PS20]], align 4 +// CHECK-I386-NEXT: [[PS21:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP47]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP48:%.*]] = load ptr, ptr [[PS2]], align 4 +// CHECK-I386-NEXT: [[PS22:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP48]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP49:%.*]] = load ptr, ptr [[PS22]], align 4 +// CHECK-I386-NEXT: [[PS23:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP49]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP50:%.*]] = load ptr, ptr [[PS23]], align 4 +// CHECK-I386-NEXT: [[PS24:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP50]], i32 0, i32 1 // CHECK-I386-NEXT: [[TMP51:%.*]] = load ptr, ptr [[PS2]], align 4 -// CHECK-I386-NEXT: [[S18:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP51]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP52:%.*]] = load ptr, ptr [[PS2]], align 4 -// CHECK-I386-NEXT: [[TMP53:%.*]] = load ptr, ptr [[PS2]], align 4 -// CHECK-I386-NEXT: [[PS19:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP53]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP54:%.*]] = load ptr, ptr [[PS2]], align 4 -// CHECK-I386-NEXT: [[PS20:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP54]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP55:%.*]] = load ptr, ptr [[PS20]], align 4 -// CHECK-I386-NEXT: [[PS21:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP55]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP56:%.*]] = load ptr, ptr [[PS2]], align 4 -// CHECK-I386-NEXT: [[PS22:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP56]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP57:%.*]] = load ptr, ptr [[PS22]], align 4 -// CHECK-I386-NEXT: [[PS23:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP57]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP58:%.*]] = load ptr, ptr [[PS23]], align 4 -// CHECK-I386-NEXT: [[PS24:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP58]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP59:%.*]] = load ptr, ptr [[PS2]], align 4 -// CHECK-I386-NEXT: [[PS25:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP59]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP60:%.*]] = load ptr, ptr [[PS25]], align 4 -// CHECK-I386-NEXT: [[PS26:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP60]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP61:%.*]] = load ptr, ptr [[PS26]], align 4 -// CHECK-I386-NEXT: [[PS27:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP61]], i32 0, i32 1 -// CHECK-I386-NEXT: [[TMP62:%.*]] = load ptr, ptr [[PS27]], align 4 -// CHECK-I386-NEXT: [[S28:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP62]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP63:%.*]] = getelementptr ptr, ptr [[PS19]], i32 1 -// CHECK-I386-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP63]] to i64 -// CHECK-I386-NEXT: [[TMP67:%.*]] = ptrtoint ptr [[S18]] to i64 -// CHECK-I386-NEXT: [[TMP68:%.*]] = sub i64 [[TMP66]], [[TMP67]] -// CHECK-I386-NEXT: [[TMP69:%.*]] = sdiv exact i64 [[TMP68]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK-I386-NEXT: [[PS25:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP51]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP52:%.*]] = load ptr, ptr [[PS25]], align 4 +// CHECK-I386-NEXT: [[PS26:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP52]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP53:%.*]] = load ptr, ptr [[PS26]], align 4 +// CHECK-I386-NEXT: [[PS27:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP53]], i32 0, i32 1 +// CHECK-I386-NEXT: [[TMP54:%.*]] = load ptr, ptr [[PS27]], align 4 +// CHECK-I386-NEXT: [[S28:%.*]] = getelementptr inbounds [[STRUCT_S2]], ptr [[TMP54]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP55:%.*]] = getelementptr ptr, ptr [[PS19]], i32 1 +// CHECK-I386-NEXT: [[TMP56:%.*]] = ptrtoint ptr [[TMP55]] to i64 +// CHECK-I386-NEXT: [[TMP57:%.*]] = ptrtoint ptr [[S18]] to i64 +// CHECK-I386-NEXT: [[TMP58:%.*]] = sub i64 [[TMP56]], [[TMP57]] +// CHECK-I386-NEXT: [[TMP59:%.*]] = sdiv exact i64 [[TMP58]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK-I386-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.5, i32 88, i1 false) -// CHECK-I386-NEXT: [[TMP71:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr [[TMP30]], ptr [[TMP71]], align 4 -// CHECK-I386-NEXT: [[TMP73:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr [[S]], ptr [[TMP73]], align 4 -// CHECK-I386-NEXT: [[TMP75:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK-I386-NEXT: store i64 [[TMP49]], ptr [[TMP75]], align 4 -// CHECK-I386-NEXT: [[TMP76:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 0 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP76]], align 4 -// CHECK-I386-NEXT: [[TMP77:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1 -// CHECK-I386-NEXT: store ptr [[TMP30]], ptr [[TMP77]], align 4 -// CHECK-I386-NEXT: [[TMP79:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 1 -// CHECK-I386-NEXT: store ptr [[S]], ptr [[TMP79]], align 4 -// CHECK-I386-NEXT: [[TMP81:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 1 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP81]], align 4 -// CHECK-I386-NEXT: [[TMP82:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2 -// CHECK-I386-NEXT: store ptr [[PS]], ptr [[TMP82]], align 4 -// CHECK-I386-NEXT: [[TMP84:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 2 -// CHECK-I386-NEXT: store ptr [[PS10]], ptr [[TMP84]], align 4 -// CHECK-I386-NEXT: [[TMP86:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 2 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP86]], align 4 -// CHECK-I386-NEXT: [[TMP87:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3 -// CHECK-I386-NEXT: store ptr [[PS10]], ptr [[TMP87]], align 4 -// CHECK-I386-NEXT: [[TMP89:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 3 -// CHECK-I386-NEXT: store ptr [[PS13]], ptr [[TMP89]], align 4 -// CHECK-I386-NEXT: [[TMP91:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 3 +// CHECK-I386-NEXT: [[TMP60:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr [[TMP24]], ptr [[TMP60]], align 4 +// CHECK-I386-NEXT: [[TMP61:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr [[S]], ptr [[TMP61]], align 4 +// CHECK-I386-NEXT: [[TMP62:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-I386-NEXT: store i64 [[TMP41]], ptr [[TMP62]], align 4 +// CHECK-I386-NEXT: [[TMP63:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 0 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CHECK-I386-NEXT: [[TMP64:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 1 +// CHECK-I386-NEXT: store ptr [[TMP24]], ptr [[TMP64]], align 4 +// CHECK-I386-NEXT: [[TMP65:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 1 +// CHECK-I386-NEXT: store ptr [[S]], ptr [[TMP65]], align 4 +// CHECK-I386-NEXT: [[TMP66:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 1 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP66]], align 4 +// CHECK-I386-NEXT: [[TMP67:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 2 +// CHECK-I386-NEXT: store ptr [[PS]], ptr [[TMP67]], align 4 +// CHECK-I386-NEXT: [[TMP68:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 2 +// CHECK-I386-NEXT: store ptr [[PS10]], ptr [[TMP68]], align 4 +// CHECK-I386-NEXT: [[TMP69:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 2 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP69]], align 4 +// CHECK-I386-NEXT: [[TMP70:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 3 +// CHECK-I386-NEXT: store ptr [[PS10]], ptr [[TMP70]], align 4 +// CHECK-I386-NEXT: [[TMP71:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 3 +// CHECK-I386-NEXT: store ptr [[PS13]], ptr [[TMP71]], align 4 +// CHECK-I386-NEXT: [[TMP72:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 3 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP72]], align 4 +// CHECK-I386-NEXT: [[TMP73:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4 +// CHECK-I386-NEXT: store ptr [[PS13]], ptr [[TMP73]], align 4 +// CHECK-I386-NEXT: [[TMP74:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 4 +// CHECK-I386-NEXT: store ptr [[S17]], ptr [[TMP74]], align 4 +// CHECK-I386-NEXT: [[TMP75:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 4 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP75]], align 4 +// CHECK-I386-NEXT: [[TMP76:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5 +// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP76]], align 4 +// CHECK-I386-NEXT: [[TMP77:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 5 +// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP77]], align 4 +// CHECK-I386-NEXT: [[TMP78:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 5 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP78]], align 4 +// CHECK-I386-NEXT: [[TMP79:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6 +// CHECK-I386-NEXT: store ptr [[TMP42]], ptr [[TMP79]], align 4 +// CHECK-I386-NEXT: [[TMP80:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 6 +// CHECK-I386-NEXT: store ptr [[S18]], ptr [[TMP80]], align 4 +// CHECK-I386-NEXT: [[TMP81:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 6 +// CHECK-I386-NEXT: store i64 [[TMP59]], ptr [[TMP81]], align 4 +// CHECK-I386-NEXT: [[TMP82:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 6 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP82]], align 4 +// CHECK-I386-NEXT: [[TMP83:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7 +// CHECK-I386-NEXT: store ptr [[TMP42]], ptr [[TMP83]], align 4 +// CHECK-I386-NEXT: [[TMP84:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 7 +// CHECK-I386-NEXT: store ptr [[S18]], ptr [[TMP84]], align 4 +// CHECK-I386-NEXT: [[TMP85:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 7 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP85]], align 4 +// CHECK-I386-NEXT: [[TMP86:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8 +// CHECK-I386-NEXT: store ptr [[PS19]], ptr [[TMP86]], align 4 +// CHECK-I386-NEXT: [[TMP87:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 8 +// CHECK-I386-NEXT: store ptr [[PS21]], ptr [[TMP87]], align 4 +// CHECK-I386-NEXT: [[TMP88:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 8 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP88]], align 4 +// CHECK-I386-NEXT: [[TMP89:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9 +// CHECK-I386-NEXT: store ptr [[PS21]], ptr [[TMP89]], align 4 +// CHECK-I386-NEXT: [[TMP90:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 9 +// CHECK-I386-NEXT: store ptr [[PS24]], ptr [[TMP90]], align 4 +// CHECK-I386-NEXT: [[TMP91:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 9 // CHECK-I386-NEXT: store ptr null, ptr [[TMP91]], align 4 -// CHECK-I386-NEXT: [[TMP92:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 4 -// CHECK-I386-NEXT: store ptr [[PS13]], ptr [[TMP92]], align 4 -// CHECK-I386-NEXT: [[TMP94:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 4 -// CHECK-I386-NEXT: store ptr [[S17]], ptr [[TMP94]], align 4 -// CHECK-I386-NEXT: [[TMP96:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 4 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP96]], align 4 -// CHECK-I386-NEXT: [[TMP97:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 5 -// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP97]], align 4 -// CHECK-I386-NEXT: [[TMP99:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 5 -// CHECK-I386-NEXT: store ptr [[ARG_ADDR]], ptr [[TMP99]], align 4 -// CHECK-I386-NEXT: [[TMP101:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 5 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP101]], align 4 -// CHECK-I386-NEXT: [[TMP102:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 6 -// CHECK-I386-NEXT: store ptr [[TMP50]], ptr [[TMP102]], align 4 -// CHECK-I386-NEXT: [[TMP104:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 6 -// CHECK-I386-NEXT: store ptr [[S18]], ptr [[TMP104]], align 4 -// CHECK-I386-NEXT: [[TMP106:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 6 -// CHECK-I386-NEXT: store i64 [[TMP69]], ptr [[TMP106]], align 4 -// CHECK-I386-NEXT: [[TMP107:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 6 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP107]], align 4 -// CHECK-I386-NEXT: [[TMP108:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 7 -// CHECK-I386-NEXT: store ptr [[TMP50]], ptr [[TMP108]], align 4 -// CHECK-I386-NEXT: [[TMP110:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 7 -// CHECK-I386-NEXT: store ptr [[S18]], ptr [[TMP110]], align 4 -// CHECK-I386-NEXT: [[TMP112:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 7 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP112]], align 4 -// CHECK-I386-NEXT: [[TMP113:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 8 -// CHECK-I386-NEXT: store ptr [[PS19]], ptr [[TMP113]], align 4 -// CHECK-I386-NEXT: [[TMP115:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 8 -// CHECK-I386-NEXT: store ptr [[PS21]], ptr [[TMP115]], align 4 -// CHECK-I386-NEXT: [[TMP117:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 8 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP117]], align 4 -// CHECK-I386-NEXT: [[TMP118:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 9 -// CHECK-I386-NEXT: store ptr [[PS21]], ptr [[TMP118]], align 4 -// CHECK-I386-NEXT: [[TMP120:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 9 -// CHECK-I386-NEXT: store ptr [[PS24]], ptr [[TMP120]], align 4 -// CHECK-I386-NEXT: [[TMP122:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 9 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP122]], align 4 -// CHECK-I386-NEXT: [[TMP123:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10 -// CHECK-I386-NEXT: store ptr [[PS24]], ptr [[TMP123]], align 4 -// CHECK-I386-NEXT: [[TMP125:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 10 -// CHECK-I386-NEXT: store ptr [[S28]], ptr [[TMP125]], align 4 -// CHECK-I386-NEXT: [[TMP127:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 10 -// CHECK-I386-NEXT: store ptr null, ptr [[TMP127]], align 4 -// CHECK-I386-NEXT: [[TMP128:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP129:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP130:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP128]], ptr [[TMP129]], ptr [[TMP130]], ptr @.offload_maptypes.6, ptr null, ptr null) -// CHECK-I386-NEXT: [[TMP131:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[INC32:%.*]] = add nsw i32 [[TMP131]], 1 +// CHECK-I386-NEXT: [[TMP92:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 10 +// CHECK-I386-NEXT: store ptr [[PS24]], ptr [[TMP92]], align 4 +// CHECK-I386-NEXT: [[TMP93:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 10 +// CHECK-I386-NEXT: store ptr [[S28]], ptr [[TMP93]], align 4 +// CHECK-I386-NEXT: [[TMP94:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_MAPPERS31]], i32 0, i32 10 +// CHECK-I386-NEXT: store ptr null, ptr [[TMP94]], align 4 +// CHECK-I386-NEXT: [[TMP95:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP96:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP97:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-I386-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP95]], ptr [[TMP96]], ptr [[TMP97]], ptr @.offload_maptypes.6, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK-I386-NEXT: [[INC32:%.*]] = add nsw i32 [[TMP98]], 1 // CHECK-I386-NEXT: store i32 [[INC32]], ptr [[ARG_ADDR]], align 4 -// CHECK-I386-NEXT: [[TMP132:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 -// CHECK-I386-NEXT: [[TMP134:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP132]], ptr [[TMP133]], ptr [[TMP134]], ptr @.offload_maptypes.6, ptr null, ptr null) +// CHECK-I386-NEXT: [[TMP99:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP100:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 +// CHECK-I386-NEXT: [[TMP101:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-I386-NEXT: call void @__tgt_target_data_end_mapper(ptr @[[GLOB1]], i64 -1, i32 11, ptr [[TMP99]], ptr [[TMP100]], ptr [[TMP101]], ptr @.offload_maptypes.6, ptr null, ptr null) // CHECK-I386-NEXT: ret void // void foo(int arg) { @@ -510,3 +514,21 @@ } #endif +//. +// CHECK-PPC64LE: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-PPC64LE: attributes #1 = { nounwind } +// CHECK-PPC64LE: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +//. +// CHECK-I386: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-I386: attributes #1 = { nounwind } +// CHECK-I386: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +//. +// CHECK-PPC64LE: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK-PPC64LE: !1 = !{i32 7, !"openmp", i32 50} +// CHECK-PPC64LE: !2 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-I386: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +// CHECK-I386: !1 = !{i32 1, !"wchar_size", i32 4} +// CHECK-I386: !2 = !{i32 7, !"openmp", i32 50} +// CHECK-I386: !3 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. diff --git a/clang/test/OpenMP/target_globals_codegen.cpp b/clang/test/OpenMP/target_globals_codegen.cpp --- a/clang/test/OpenMP/target_globals_codegen.cpp +++ b/clang/test/OpenMP/target_globals_codegen.cpp @@ -21,48 +21,70 @@ // CHECK: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK: @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode = weak protected constant i8 1 +// CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode], section "llvm.metadata" //. // CHECK-EQ: @__omp_rtl_debug_kind = weak_odr hidden constant i32 111 // CHECK-EQ: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-EQ: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-EQ: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-EQ: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-EQ: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-EQ: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-EQ: @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode = weak protected constant i8 1 +// CHECK-EQ: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode], section "llvm.metadata" //. // CHECK-DEFAULT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-DEFAULT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-DEFAULT: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-DEFAULT: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-DEFAULT: @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode = weak protected constant i8 1 +// CHECK-DEFAULT: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode], section "llvm.metadata" //. // CHECK-THREADS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-THREADS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-THREADS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 1 // CHECK-THREADS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-THREADS: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-THREADS: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-THREADS: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-THREADS: @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode = weak protected constant i8 1 +// CHECK-THREADS: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode], section "llvm.metadata" //. // CHECK-TEAMS: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-TEAMS: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 // CHECK-TEAMS: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-TEAMS: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-TEAMS: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-TEAMS: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-TEAMS: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-TEAMS: @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode = weak protected constant i8 1 +// CHECK-TEAMS: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode], section "llvm.metadata" //. // CHECK-STATE: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-STATE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-STATE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-STATE: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1 // CHECK-STATE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-STATE: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-STATE: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-STATE: @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode = weak protected constant i8 1 +// CHECK-STATE: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode], section "llvm.metadata" //. // CHECK-NESTED: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 // CHECK-NESTED: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 1 -//. -// CHECK-RUNTIME-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 0 -// CHECK-RUNTIME-NOT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0 +// CHECK-NESTED: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-NESTED: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +// CHECK-NESTED: @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode = weak protected constant i8 1 +// CHECK-NESTED: @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd00_2691bff__Z3foov_l68_exec_mode], section "llvm.metadata" //. void foo() { #pragma omp target @@ -70,3 +92,72 @@ } #endif +//. +// CHECK: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-EQ: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-DEFAULT: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-THREADS: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-TEAMS: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-STATE: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK-NESTED: attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="none" "kernel" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32,+sm_20" } +//. +// CHECK: !0 = !{i32 0, i32 64768, i32 40442879, !"_Z3foov", i32 68, i32 0, i32 0} +// CHECK: !1 = !{ptr @__omp_offloading_fd00_2691bff__Z3foov_l68, !"kernel", i32 1} +// CHECK: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK: !3 = !{i32 7, !"openmp", i32 50} +// CHECK: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-EQ: !0 = !{i32 0, i32 64768, i32 40442879, !"_Z3foov", i32 68, i32 0, i32 0} +// CHECK-EQ: !1 = !{ptr @__omp_offloading_fd00_2691bff__Z3foov_l68, !"kernel", i32 1} +// CHECK-EQ: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-EQ: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-EQ: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-EQ: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-DEFAULT: !0 = !{i32 0, i32 64768, i32 40442879, !"_Z3foov", i32 68, i32 0, i32 0} +// CHECK-DEFAULT: !1 = !{ptr @__omp_offloading_fd00_2691bff__Z3foov_l68, !"kernel", i32 1} +// CHECK-DEFAULT: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-DEFAULT: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-DEFAULT: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-DEFAULT: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-THREADS: !0 = !{i32 0, i32 64768, i32 40442879, !"_Z3foov", i32 68, i32 0, i32 0} +// CHECK-THREADS: !1 = !{ptr @__omp_offloading_fd00_2691bff__Z3foov_l68, !"kernel", i32 1} +// CHECK-THREADS: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-THREADS: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-THREADS: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-THREADS: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-TEAMS: !0 = !{i32 0, i32 64768, i32 40442879, !"_Z3foov", i32 68, i32 0, i32 0} +// CHECK-TEAMS: !1 = !{ptr @__omp_offloading_fd00_2691bff__Z3foov_l68, !"kernel", i32 1} +// CHECK-TEAMS: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-TEAMS: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-TEAMS: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-TEAMS: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-STATE: !0 = !{i32 0, i32 64768, i32 40442879, !"_Z3foov", i32 68, i32 0, i32 0} +// CHECK-STATE: !1 = !{ptr @__omp_offloading_fd00_2691bff__Z3foov_l68, !"kernel", i32 1} +// CHECK-STATE: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-STATE: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-STATE: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-STATE: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-NESTED: !0 = !{i32 0, i32 64768, i32 40442879, !"_Z3foov", i32 68, i32 0, i32 0} +// CHECK-NESTED: !1 = !{ptr @__omp_offloading_fd00_2691bff__Z3foov_l68, !"kernel", i32 1} +// CHECK-NESTED: !2 = !{i32 1, !"wchar_size", i32 4} +// CHECK-NESTED: !3 = !{i32 7, !"openmp", i32 50} +// CHECK-NESTED: !4 = !{i32 7, !"openmp-device", i32 50} +// CHECK-NESTED: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-RUNTIME: !0 = !{i32 1, !"wchar_size", i32 4} +// CHECK-RUNTIME: !1 = !{i32 7, !"openmp", i32 50} +// CHECK-RUNTIME: !2 = !{i32 7, !"openmp-device", i32 50} +// CHECK-RUNTIME: !3 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. diff --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp --- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp +++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp @@ -350,184 +350,184 @@ // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.5, ptr [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.5, ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK: omp_offload.failed5: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l147(ptr [[TMP18]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]] // CHECK: omp_offload.cont6: -// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.7, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.7, ptr [[TMP44]], align 8 +// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP45]], align 8 +// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8 -// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK: omp_offload.failed11: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l149(ptr [[AA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK: omp_offload.cont12: -// CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[RAA]], align 8 -// CHECK-NEXT: store ptr [[TMP52]], ptr [[_TMP13]], align 8 -// CHECK-NEXT: [[TMP53:%.*]] = load ptr, ptr [[_TMP13]], align 8 -// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP55]], align 8 -// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP53]], ptr [[TMP56]], align 8 -// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS16]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP57]], align 8 -// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 -// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 +// CHECK-NEXT: [[TMP51:%.*]] = load ptr, ptr [[RAA]], align 8 +// CHECK-NEXT: store ptr [[TMP51]], ptr [[_TMP13]], align 8 +// CHECK-NEXT: [[TMP52:%.*]] = load ptr, ptr [[_TMP13]], align 8 +// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP52]], ptr [[TMP53]], align 8 +// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP52]], ptr [[TMP54]], align 8 +// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS16]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 +// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS17:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP60]], align 4 -// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP61]], align 4 -// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP58]], ptr [[TMP62]], align 8 -// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP59]], ptr [[TMP63]], align 8 -// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.9, ptr [[TMP64]], align 8 -// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP65]], align 8 -// CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP66]], align 8 -// CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP67]], align 8 -// CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP68]], align 8 -// CHECK-NEXT: [[TMP69:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, ptr [[KERNEL_ARGS17]]) -// CHECK-NEXT: [[TMP70:%.*]] = icmp ne i32 [[TMP69]], 0 -// CHECK-NEXT: br i1 [[TMP70]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]] +// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4 +// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP59]], align 4 +// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8 +// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP57]], ptr [[TMP61]], align 8 +// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.9, ptr [[TMP62]], align 8 +// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP63]], align 8 +// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8 +// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP65]], align 8 +// CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP66]], align 8 +// CHECK-NEXT: [[TMP67:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151.region_id, ptr [[KERNEL_ARGS17]]) +// CHECK-NEXT: [[TMP68:%.*]] = icmp ne i32 [[TMP67]], 0 +// CHECK-NEXT: br i1 [[TMP68]], label [[OMP_OFFLOAD_FAILED18:%.*]], label [[OMP_OFFLOAD_CONT19:%.*]] // CHECK: omp_offload.failed18: -// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(ptr [[TMP53]]) #[[ATTR5]] +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l151(ptr [[TMP52]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT19]] // CHECK: omp_offload.cont19: -// CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP71]], align 8 -// CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP72]], align 8 -// CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP73]], align 8 -// CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 -// CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP69]], align 8 +// CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP70]], align 8 +// CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP71]], align 8 +// CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP76]], align 4 -// CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP77]], align 4 -// CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP74]], ptr [[TMP78]], align 8 -// CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP75]], ptr [[TMP79]], align 8 -// CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.11, ptr [[TMP80]], align 8 -// CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP81]], align 8 -// CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP82]], align 8 -// CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP83]], align 8 -// CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP84]], align 8 -// CHECK-NEXT: [[TMP85:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, ptr [[KERNEL_ARGS23]]) -// CHECK-NEXT: [[TMP86:%.*]] = icmp ne i32 [[TMP85]], 0 -// CHECK-NEXT: br i1 [[TMP86]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP74]], align 4 +// CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP75]], align 4 +// CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP72]], ptr [[TMP76]], align 8 +// CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP73]], ptr [[TMP77]], align 8 +// CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.11, ptr [[TMP78]], align 8 +// CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP79]], align 8 +// CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP80]], align 8 +// CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP81]], align 8 +// CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP82]], align 8 +// CHECK-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153.region_id, ptr [[KERNEL_ARGS23]]) +// CHECK-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CHECK-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] // CHECK: omp_offload.failed24: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l153(ptr [[H]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT25]] // CHECK: omp_offload.cont25: -// CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[DA]], ptr [[TMP87]], align 8 -// CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[DA]], ptr [[TMP88]], align 8 -// CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP89]], align 8 -// CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 -// CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DA]], ptr [[TMP85]], align 8 +// CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[DA]], ptr [[TMP86]], align 8 +// CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS28]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP87]], align 8 +// CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 +// CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS29:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 -// CHECK-NEXT: store i32 1, ptr [[TMP92]], align 4 -// CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP93]], align 4 -// CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 -// CHECK-NEXT: store ptr [[TMP90]], ptr [[TMP94]], align 8 -// CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP91]], ptr [[TMP95]], align 8 -// CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.13, ptr [[TMP96]], align 8 -// CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP97]], align 8 -// CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6 -// CHECK-NEXT: store ptr null, ptr [[TMP98]], align 8 -// CHECK-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP99]], align 8 -// CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP100]], align 8 -// CHECK-NEXT: [[TMP101:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, ptr [[KERNEL_ARGS29]]) -// CHECK-NEXT: [[TMP102:%.*]] = icmp ne i32 [[TMP101]], 0 -// CHECK-NEXT: br i1 [[TMP102]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]] +// CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP90]], align 4 +// CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 +// CHECK-NEXT: store i32 1, ptr [[TMP91]], align 4 +// CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP88]], ptr [[TMP92]], align 8 +// CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 3 +// CHECK-NEXT: store ptr [[TMP89]], ptr [[TMP93]], align 8 +// CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.13, ptr [[TMP94]], align 8 +// CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.14, ptr [[TMP95]], align 8 +// CHECK-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP96]], align 8 +// CHECK-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 7 +// CHECK-NEXT: store ptr null, ptr [[TMP97]], align 8 +// CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP98]], align 8 +// CHECK-NEXT: [[TMP99:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155.region_id, ptr [[KERNEL_ARGS29]]) +// CHECK-NEXT: [[TMP100:%.*]] = icmp ne i32 [[TMP99]], 0 +// CHECK-NEXT: br i1 [[TMP100]], label [[OMP_OFFLOAD_FAILED30:%.*]], label [[OMP_OFFLOAD_CONT31:%.*]] // CHECK: omp_offload.failed30: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l155(ptr [[DA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT31]] // CHECK: omp_offload.cont31: -// CHECK-NEXT: [[TMP103:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_S0_(i32 noundef signext [[TMP103]]) +// CHECK-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_S0_(i32 noundef signext [[TMP101]]) // CHECK-NEXT: [[CALL32:%.*]] = call noundef ptr @_Z5tmainIPiET_S1_(ptr noundef [[ARGC_ADDR]]) -// CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[CALL32]], align 4 -// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP104]] +// CHECK-NEXT: [[TMP102:%.*]] = load i32, ptr [[CALL32]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[TMP102]] // CHECK-NEXT: ret i32 [[ADD]] // // @@ -681,104 +681,104 @@ // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.17, ptr [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.17, ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.18, ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK: omp_offload.failed5: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l125(ptr [[TMP18]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]] // CHECK: omp_offload.cont6: -// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.19, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.19, ptr [[TMP44]], align 8 +// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.20, ptr [[TMP45]], align 8 +// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8 -// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK: omp_offload.failed11: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l127(ptr [[AA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK: omp_offload.cont12: -// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP51]], align 8 +// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: store ptr [[H]], ptr [[TMP52]], align 8 -// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP53]], align 8 -// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 -// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP56]], align 4 +// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP57]], align 4 -// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4 -// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP58]], align 8 +// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP55]], ptr [[TMP59]], align 8 -// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8 -// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.21, ptr [[TMP61]], align 8 -// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP62]], align 8 -// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.21, ptr [[TMP60]], align 8 +// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.22, ptr [[TMP61]], align 8 +// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP62]], align 8 +// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP63]], align 8 -// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8 -// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP65]], align 8 -// CHECK-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129.region_id, ptr [[KERNEL_ARGS16]]) -// CHECK-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 -// CHECK-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP64]], align 8 +// CHECK-NEXT: [[TMP65:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129.region_id, ptr [[KERNEL_ARGS16]]) +// CHECK-NEXT: [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0 +// CHECK-NEXT: br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] // CHECK: omp_offload.failed17: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_S0__l129(ptr [[H]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT18]] @@ -855,104 +855,104 @@ // CHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[Z]], align 8 // CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP]], align 8 // CHECK-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP20]], align 8 -// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP21]], align 8 -// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP24]], align 4 +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP26]], align 4 -// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8 -// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP24]], ptr [[TMP28]], align 8 -// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.25, ptr [[TMP29]], align 8 -// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.26, ptr [[TMP30]], align 8 -// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.25, ptr [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.26, ptr [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP33]], align 8 -// CHECK-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP32]], align 8 +// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK: omp_offload.failed5: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l125(ptr [[TMP18]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT6]] // CHECK: omp_offload.cont6: -// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[AA]], ptr [[TMP35]], align 8 +// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: store ptr [[AA]], ptr [[TMP36]], align 8 -// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[AA]], ptr [[TMP37]], align 8 -// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 -// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP37]], align 8 +// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP40]], align 4 +// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP41]], align 4 -// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP42]], align 4 -// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8 +// CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP39]], ptr [[TMP43]], align 8 -// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP40]], ptr [[TMP44]], align 8 -// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.27, ptr [[TMP45]], align 8 -// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.28, ptr [[TMP46]], align 8 -// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.27, ptr [[TMP44]], align 8 +// CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.28, ptr [[TMP45]], align 8 +// CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP46]], align 8 +// CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP47]], align 8 -// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP49]], align 8 -// CHECK-NEXT: [[TMP50:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127.region_id, ptr [[KERNEL_ARGS10]]) -// CHECK-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK-NEXT: br i1 [[TMP51]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP48]], align 8 +// CHECK-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127.region_id, ptr [[KERNEL_ARGS10]]) +// CHECK-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] // CHECK: omp_offload.failed11: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l127(ptr [[AA]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT12]] // CHECK: omp_offload.cont12: -// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: store ptr [[H]], ptr [[TMP51]], align 8 +// CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: store ptr [[H]], ptr [[TMP52]], align 8 -// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 -// CHECK-NEXT: store ptr [[H]], ptr [[TMP53]], align 8 -// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 -// CHECK-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 -// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CHECK-NEXT: [[TMP53:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CHECK-NEXT: store ptr null, ptr [[TMP53]], align 8 +// CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr [[TMP56]], align 4 +// CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK-NEXT: store i32 1, ptr [[TMP57]], align 4 -// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 -// CHECK-NEXT: store i32 1, ptr [[TMP58]], align 4 -// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CHECK-NEXT: store ptr [[TMP54]], ptr [[TMP58]], align 8 +// CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 // CHECK-NEXT: store ptr [[TMP55]], ptr [[TMP59]], align 8 -// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 -// CHECK-NEXT: store ptr [[TMP56]], ptr [[TMP60]], align 8 -// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 -// CHECK-NEXT: store ptr @.offload_sizes.29, ptr [[TMP61]], align 8 -// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 -// CHECK-NEXT: store ptr @.offload_maptypes.30, ptr [[TMP62]], align 8 -// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CHECK-NEXT: store ptr @.offload_sizes.29, ptr [[TMP60]], align 8 +// CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CHECK-NEXT: store ptr @.offload_maptypes.30, ptr [[TMP61]], align 8 +// CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CHECK-NEXT: store ptr null, ptr [[TMP62]], align 8 +// CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 // CHECK-NEXT: store ptr null, ptr [[TMP63]], align 8 -// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 -// CHECK-NEXT: store ptr null, ptr [[TMP64]], align 8 -// CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 -// CHECK-NEXT: store i64 0, ptr [[TMP65]], align 8 -// CHECK-NEXT: [[TMP66:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129.region_id, ptr [[KERNEL_ARGS16]]) -// CHECK-NEXT: [[TMP67:%.*]] = icmp ne i32 [[TMP66]], 0 -// CHECK-NEXT: br i1 [[TMP67]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CHECK-NEXT: store i64 0, ptr [[TMP64]], align 8 +// CHECK-NEXT: [[TMP65:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129.region_id, ptr [[KERNEL_ARGS16]]) +// CHECK-NEXT: [[TMP66:%.*]] = icmp ne i32 [[TMP65]], 0 +// CHECK-NEXT: br i1 [[TMP66]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] // CHECK: omp_offload.failed17: // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPiET_S1__l129(ptr [[H]]) #[[ATTR5]] // CHECK-NEXT: br label [[OMP_OFFLOAD_CONT18]] diff --git a/clang/test/OpenMP/target_in_reduction_codegen.cpp b/clang/test/OpenMP/target_in_reduction_codegen.cpp --- a/clang/test/OpenMP/target_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_in_reduction_codegen.cpp @@ -49,6 +49,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: %omp.outlined.arg.agg. = alloca %struct.anon // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -168,7 +169,15 @@ // CHECK1-NEXT: [[TMP57:%.*]] = bitcast [2 x %struct.kmp_taskred_input_t.0]* [[DOTRD_INPUT_3]] to i8* // CHECK1-NEXT: [[TMP58:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 2, i8* [[TMP57]]) // CHECK1-NEXT: store i8* [[TMP58]], i8** [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[A]], i64 [[TMP2]], i16* [[VLA]], i8** [[DOTTASK_RED_]]) +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 0 +// CHECK1-NEXT: store i32* %a, i32** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 1 +// CHECK1-NEXT: store i64 %2, i64* [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 2 +// CHECK1-NEXT: store i16* %vla, i16** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* %omp.outlined.arg.agg., i32 0, i32 3 +// CHECK1-NEXT: store i8** %.task_red., i8*** [[TMP62]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* %omp.outlined.arg.agg.) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 @@ -462,52 +471,51 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], i16* nonnull align 2 dereferenceable(2) [[D:%.*]], i8** nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.anon* noalias [[CONTEXT_:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[CONTEXT_ADDR_:%.*]] = alloca %struct.anon*, align 8 // CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[D]], i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: store i8** [[DOTTASK_RED_]], i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16*, i16** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store i16* [[TMP2]], i16** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 1, i64 48, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to %struct.kmp_task_t_with_privates* -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP14]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP16]], i8* align 8 [[TMP17]], i64 32, i1 false) -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP13]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP16]] to %struct.anon* -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP18]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP3]], align 8 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i8* [[TMP12]]) -// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP11]], %struct.kmp_task_t_with_privates* [[TMP13]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i8* [[TMP12]]) +// CHECK1-NEXT: store %struct.anon* [[CONTEXT_]], %struct.anon** [[CONTEXT_ADDR_]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[CONTEXT_ADDR_]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16*, i16** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds %struct.anon, %struct.anon* [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8**, i8*** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 0 +// CHECK1-NEXT: store i32* [[TMP2]], i32** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 2 +// CHECK1-NEXT: store i16* [[TMP6]], i16** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds %struct.anon.1, %struct.anon.1* %agg.captured, i32 0, i32 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP16]], i32 1, i64 48, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to %struct.kmp_task_t_with_privates* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 32, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP21]] to %struct.anon.1* +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP23]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** %8, align 8 +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(%struct.ident_t* @[[GLOB1]], i32 %16, i8* [[TMP17]]) +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @.omp_task_entry.(i32 %16, %struct.kmp_task_t_with_privates* [[TMP18]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(%struct.ident_t* @[[GLOB1]], i32 %16, i8* [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -573,7 +581,7 @@ // CHECK1-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 // CHECK1-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 // CHECK1-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 @@ -585,7 +593,7 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* @@ -598,25 +606,25 @@ // CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon.1* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 // CHECK1-NEXT: [[TMP15:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast void (i8*, ...)* [[TMP15]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP17]](i8* [[TMP16]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3]] // CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon.1* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP19]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP18]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP20]] to i8* // CHECK1-NEXT: [[TMP24:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP22]], i8* [[TMP21]], i8* [[TMP23]]) // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP24]] to i32* -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon.1* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon.1* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP28:%.*]] = load i16*, i16** [[TMP27]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i8*, i8** [[TMP18]], align 8 // CHECK1-NEXT: call void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}(i32* [[TMP26]], i64 [[TMP14]], i16* [[TMP28]], i8* [[TMP29]]) #[[ATTR3]] diff --git a/clang/test/OpenMP/target_map_codegen_03.cpp b/clang/test/OpenMP/target_map_codegen_03.cpp --- a/clang/test/OpenMP/target_map_codegen_03.cpp +++ b/clang/test/OpenMP/target_map_codegen_03.cpp @@ -62,34 +62,39 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[I_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[I_CASTED]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[I_CASTED]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 // CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -117,7 +122,7 @@ // CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK1-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i64 [[TMP2]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i64 [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret void @@ -127,24 +132,29 @@ // CHECK1-SAME: (i64 noundef [[I:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -160,34 +170,39 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[I]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[I]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[I_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 // CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -215,7 +230,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK3-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i32 [[TMP2]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z28implicit_maps_nested_integeri_l48(i32 [[TMP4]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret void @@ -225,24 +240,29 @@ // CHECK3-SAME: (i32 noundef [[I:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[I_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_map_codegen_hold.cpp b/clang/test/OpenMP/target_map_codegen_hold.cpp --- a/clang/test/OpenMP/target_map_codegen_hold.cpp +++ b/clang/test/OpenMP/target_map_codegen_hold.cpp @@ -83,33 +83,81 @@ // MEMBER_OF_5 = 0x5000000000000 //. +// CHECK-USE-PPC64LE: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id = weak constant i8 0 // CHECK-USE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] -// CHECK-USE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2023]], i64 [[#0x2020]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]] +// CHECK-USE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8224, i64 281474976718851, i64 281474976718851, i64 8227, i64 8224, i64 1407374883561475, i64 1407374883561475] +// CHECK-USE-PPC64LE: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-USE-PPC64LE: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +// CHECK-USE-PPC64LE: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id = weak constant i8 0 // CHECK-USE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] -// CHECK-USE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]] +// CHECK-USE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9255] +// CHECK-USE-PPC64LE: @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id = weak constant i8 0 // CHECK-USE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] -// CHECK-USE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]] +// CHECK-USE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8224, i64 281474976718851, i64 281474976718851] +// CHECK-USE-PPC64LE: @.omp_offloading.entry_name = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782\00" +// CHECK-USE-PPC64LE: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-PPC64LE: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796\00" +// CHECK-USE-PPC64LE: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name.5, i32 0, i32 0), i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-PPC64LE: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115\00" +// CHECK-USE-PPC64LE: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id, i8* getelementptr inbounds ([67 x i8], [67 x i8]* @.omp_offloading.entry_name.6, i32 0, i32 0), i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-PPC64LE: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }] //. +// CHECK-USE-I386: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id = weak constant i8 0 // CHECK-USE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] -// CHECK-USE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2023]], i64 [[#0x2020]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]] +// CHECK-USE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8224, i64 281474976718851, i64 281474976718851, i64 8227, i64 8224, i64 1407374883561475, i64 1407374883561475] +// CHECK-USE-I386: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-USE-I386: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +// CHECK-USE-I386: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id = weak constant i8 0 // CHECK-USE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] -// CHECK-USE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2427]]] +// CHECK-USE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9255] +// CHECK-USE-I386: @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id = weak constant i8 0 // CHECK-USE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] -// CHECK-USE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2020]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]] +// CHECK-USE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8224, i64 281474976718851, i64 281474976718851] +// CHECK-USE-I386: @.omp_offloading.entry_name = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782\00" +// CHECK-USE-I386: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-I386: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796\00" +// CHECK-USE-I386: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name.5, i32 0, i32 0), i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-I386: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115\00" +// CHECK-USE-I386: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id, i8* getelementptr inbounds ([67 x i8], [67 x i8]* @.omp_offloading.entry_name.6, i32 0, i32 0), i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-USE-I386: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }] //. +// CHECK-NOUSE-PPC64LE: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id = weak constant i8 0 // CHECK-NOUSE-PPC64LE: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] -// CHECK-NOUSE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2003]], i64 [[#0x2000]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]] +// CHECK-NOUSE-PPC64LE: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8192, i64 281474976718851, i64 281474976718851, i64 8195, i64 8192, i64 1407374883561475, i64 1407374883561475] +// CHECK-NOUSE-PPC64LE: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-NOUSE-PPC64LE: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +// CHECK-NOUSE-PPC64LE: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id = weak constant i8 0 // CHECK-NOUSE-PPC64LE: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] -// CHECK-NOUSE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]] +// CHECK-NOUSE-PPC64LE: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9223] +// CHECK-NOUSE-PPC64LE: @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id = weak constant i8 0 // CHECK-NOUSE-PPC64LE: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] -// CHECK-NOUSE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]] +// CHECK-NOUSE-PPC64LE: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8192, i64 281474976718851, i64 281474976718851] +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry_name = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782\00" +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796\00" +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name.5, i32 0, i32 0), i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115\00" +// CHECK-NOUSE-PPC64LE: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id, i8* getelementptr inbounds ([67 x i8], [67 x i8]* @.omp_offloading.entry_name.6, i32 0, i32 0), i64 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-PPC64LE: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }] //. +// CHECK-NOUSE-I386: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id = weak constant i8 0 // CHECK-NOUSE-I386: @.offload_sizes = private unnamed_addr constant [7 x i64] [i64 0, i64 4, i64 4, i64 4, i64 0, i64 4, i64 4] -// CHECK-NOUSE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]], i64 [[#0x2003]], i64 [[#0x2000]], i64 [[#0x5000000002003]], i64 [[#0x5000000002003]]] +// CHECK-NOUSE-I386: @.offload_maptypes = private unnamed_addr constant [7 x i64] [i64 8192, i64 281474976718851, i64 281474976718851, i64 8195, i64 8192, i64 1407374883561475, i64 1407374883561475] +// CHECK-NOUSE-I386: @0 = private unnamed_addr constant [23 x i8] c" +// CHECK-NOUSE-I386: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +// CHECK-NOUSE-I386: @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id = weak constant i8 0 // CHECK-NOUSE-I386: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 4] -// CHECK-NOUSE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 [[#0x2407]]] +// CHECK-NOUSE-I386: @.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 9223] +// CHECK-NOUSE-I386: @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id = weak constant i8 0 // CHECK-NOUSE-I386: @.offload_sizes.3 = private unnamed_addr constant [3 x i64] [i64 0, i64 4, i64 4] -// CHECK-NOUSE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 [[#0x2000]], i64 [[#0x1000000002003]], i64 [[#0x1000000002003]]] +// CHECK-NOUSE-I386: @.offload_maptypes.4 = private unnamed_addr constant [3 x i64] [i64 8192, i64 281474976718851, i64 281474976718851] +// CHECK-NOUSE-I386: @.omp_offloading.entry_name = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782\00" +// CHECK-NOUSE-I386: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l782.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name, i32 0, i32 0), i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-I386: @.omp_offloading.entry_name.5 = internal unnamed_addr constant [61 x i8] c"__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796\00" +// CHECK-NOUSE-I386: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__Z20explicit_maps_singlei_l796.region_id, i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.omp_offloading.entry_name.5, i32 0, i32 0), i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-I386: @.omp_offloading.entry_name.6 = internal unnamed_addr constant [67 x i8] c"__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115\00" +// CHECK-NOUSE-I386: @.omp_offloading.entry.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115 = weak constant %struct.__tgt_offload_entry { i8* @.__omp_offloading_fd00_2691c32__ZN2ST20test_present_membersEv_l1115.region_id, i8* getelementptr inbounds ([67 x i8], [67 x i8]* @.omp_offloading.entry_name.6, i32 0, i32 0), i32 0, i32 0, i32 0 }, section "omp_offloading_entries", align 1 +// CHECK-NOUSE-I386: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* @.omp_offloading.requires_reg, i8* null }] //. struct ST { int i; @@ -1122,3 +1170,58 @@ } #endif +//. +// CHECK-USE-PPC64LE: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-USE-PPC64LE: attributes #1 = { noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-USE-PPC64LE: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-USE-PPC64LE: attributes #3 = { nounwind } +// CHECK-USE-PPC64LE: attributes #4 = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +//. +// CHECK-USE-I386: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-USE-I386: attributes #1 = { noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-USE-I386: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-USE-I386: attributes #3 = { nounwind } +// CHECK-USE-I386: attributes #4 = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +//. +// CHECK-NOUSE-PPC64LE: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-NOUSE-PPC64LE: attributes #1 = { noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +// CHECK-NOUSE-PPC64LE: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-NOUSE-PPC64LE: attributes #3 = { nounwind } +// CHECK-NOUSE-PPC64LE: attributes #4 = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-altivec,-bpermd,-crbits,-crypto,-direct-move,-extdiv,-htm,-isa-v206-instructions,-isa-v207-instructions,-isa-v30-instructions,-power8-vector,-power9-vector,-privileged,-quadword-atomics,-rop-protect,-spe,-vsx" } +//. +// CHECK-NOUSE-I386: attributes #0 = { mustprogress noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-NOUSE-I386: attributes #1 = { noinline norecurse nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +// CHECK-NOUSE-I386: attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +// CHECK-NOUSE-I386: attributes #3 = { nounwind } +// CHECK-NOUSE-I386: attributes #4 = { noinline nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+x87" } +//. +// CHECK-USE-PPC64LE: !0 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 782, i32 0, i32 0} +// CHECK-USE-PPC64LE: !1 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 796, i32 0, i32 1} +// CHECK-USE-PPC64LE: !2 = !{i32 0, i32 64768, i32 40442930, !"_ZN2ST20test_present_membersEv", i32 1115, i32 0, i32 2} +// CHECK-USE-PPC64LE: !3 = !{i32 1, !"wchar_size", i32 4} +// CHECK-USE-PPC64LE: !4 = !{i32 7, !"openmp", i32 50} +// CHECK-USE-PPC64LE: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-USE-I386: !0 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 782, i32 0, i32 0} +// CHECK-USE-I386: !1 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 796, i32 0, i32 1} +// CHECK-USE-I386: !2 = !{i32 0, i32 64768, i32 40442930, !"_ZN2ST20test_present_membersEv", i32 1115, i32 0, i32 2} +// CHECK-USE-I386: !3 = !{i32 1, !"NumRegisterParameters", i32 0} +// CHECK-USE-I386: !4 = !{i32 1, !"wchar_size", i32 4} +// CHECK-USE-I386: !5 = !{i32 7, !"openmp", i32 50} +// CHECK-USE-I386: !6 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-NOUSE-PPC64LE: !0 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 782, i32 0, i32 0} +// CHECK-NOUSE-PPC64LE: !1 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 796, i32 0, i32 1} +// CHECK-NOUSE-PPC64LE: !2 = !{i32 0, i32 64768, i32 40442930, !"_ZN2ST20test_present_membersEv", i32 1115, i32 0, i32 2} +// CHECK-NOUSE-PPC64LE: !3 = !{i32 1, !"wchar_size", i32 4} +// CHECK-NOUSE-PPC64LE: !4 = !{i32 7, !"openmp", i32 50} +// CHECK-NOUSE-PPC64LE: !5 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. +// CHECK-NOUSE-I386: !0 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 782, i32 0, i32 0} +// CHECK-NOUSE-I386: !1 = !{i32 0, i32 64768, i32 40442930, !"_Z20explicit_maps_singlei", i32 796, i32 0, i32 1} +// CHECK-NOUSE-I386: !2 = !{i32 0, i32 64768, i32 40442930, !"_ZN2ST20test_present_membersEv", i32 1115, i32 0, i32 2} +// CHECK-NOUSE-I386: !3 = !{i32 1, !"NumRegisterParameters", i32 0} +// CHECK-NOUSE-I386: !4 = !{i32 1, !"wchar_size", i32 4} +// CHECK-NOUSE-I386: !5 = !{i32 7, !"openmp", i32 50} +// CHECK-NOUSE-I386: !6 = !{!"clang version 16.0.0 (https://github.com/llvm/llvm-project.git 45ce38ddb9e220051de6a7ba563a3b72f1e93cea)"} +//. diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -303,7 +303,7 @@ // CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 @@ -545,17 +545,21 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -621,27 +625,32 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -649,36 +658,41 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK1-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK1: .cancel.continue: // CHECK1-NEXT: ret void @@ -689,39 +703,45 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -737,7 +757,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -755,82 +775,95 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK1-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK1-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK1-NEXT: ret void // // @@ -1161,7 +1194,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1171,48 +1204,57 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK1-NEXT: ret void // // @@ -1223,59 +1265,67 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1286,47 +1336,54 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1350,7 +1407,7 @@ // CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 @@ -1592,17 +1649,21 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1668,27 +1729,32 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1696,36 +1762,41 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK3-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK3: .cancel.continue: // CHECK3-NEXT: ret void @@ -1736,39 +1807,45 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -1784,7 +1861,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1802,82 +1879,95 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK3-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK3-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK3-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK3-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK3-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK3-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK3-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK3-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK3-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -2208,7 +2298,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2218,48 +2308,57 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK3-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK3-NEXT: ret void // // @@ -2270,59 +2369,67 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -2333,47 +2440,54 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -2388,17 +2502,21 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2406,36 +2524,41 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK9-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK9-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: -// CHECK9-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK9-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK9: .cancel.continue: // CHECK9-NEXT: ret void @@ -2446,39 +2569,45 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -2494,7 +2623,7 @@ // CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2512,82 +2641,95 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK9-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK9-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK9-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK9-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK9-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK9-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK9-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK9-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK9-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK9-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK9-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK9-NEXT: ret void // // @@ -2598,59 +2740,67 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -2663,7 +2813,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2673,48 +2823,57 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK9-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK9-NEXT: ret void // // @@ -2724,47 +2883,54 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -2772,17 +2938,21 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -2790,36 +2960,41 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP2]], i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK11-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB1]], i32 [[TMP5]], i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i32 [[TMP6]], 0 +// CHECK11-NEXT: br i1 [[TMP7]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: -// CHECK11-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]]) // CHECK11-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK11: .cancel.continue: // CHECK11-NEXT: ret void @@ -2830,39 +3005,45 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -2878,7 +3059,7 @@ // CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2896,82 +3077,95 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK11-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK11-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK11-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK11-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK11-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK11-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK11-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK11-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK11-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK11-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK11-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK11-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK11-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK11-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK11-NEXT: ret void // // @@ -2982,59 +3176,67 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // @@ -3047,7 +3249,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3057,48 +3259,57 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK11-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK11-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK11-NEXT: ret void // // @@ -3108,47 +3319,54 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -74,8 +74,8 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -100,184 +100,138 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG47]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG48:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG48]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8, !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG48]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG49:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG48:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG49:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG48]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG48]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP14]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP15:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP15]]), !dbg [[DBG48]] +// CHECK1-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP16]]), !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP15]], i64 32), !dbg [[DBG48]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB5:[0-9]+]], i8 2, i1 true), !dbg [[DBG51:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG52:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG47]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG52:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG53:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B3:%.*]] = alloca [10 x [10 x i32]], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x i32]], align 4 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG70:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG70]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG70]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG70]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B3]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B3]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG70]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG76:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG76]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG76]] -// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[DBG75]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG78]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 1, !dbg [[DBG81:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG81]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[DBG80]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG83]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG84:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG85:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG86:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG85]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG85]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG87:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG88]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG88]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG88]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG91]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG68:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP6]], i64 400, i1 false), !dbg [[DBG68]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG74]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG74]] +// CHECK1-NEXT: store ptr [[ARRAYIDX2]], ptr [[F]], align 8, !dbg [[DBG73]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG76]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 1, !dbg [[DBG79:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG79]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[H]], align 8, !dbg [[DBG78]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG81]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG83:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG84:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG83]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG83]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG85:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG86]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG86]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG86]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG89]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG89]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG89]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4, !dbg [[DBG89]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG91:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG91]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG91]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG91]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG93]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG93]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP15]] to i64, !dbg [[DBG96]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG96]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG98]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG98]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP16]], !dbg [[DBG98]] -// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG98]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG98]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG98]] -// CHECK1-NEXT: ret void, !dbg [[DBG99:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG100:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG108]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG114:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG114]] -// CHECK1-NEXT: call void @__omp_outlined___debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG114]] -// CHECK1-NEXT: ret void, !dbg [[DBG114]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX17]], align 4, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG94]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG94]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG94]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP16]] to i1, !dbg [[DBG96]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG96]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP15]], !dbg [[DBG96]] +// CHECK1-NEXT: [[TOBOOL21:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG96]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL21]] to i8, !dbg [[DBG96]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG96]] +// CHECK1-NEXT: ret void, !dbg [[DBG97:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG115:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7:[0-9]+]] !dbg [[DBG98:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG123:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG123]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP8]]) #[[ATTR3]], !dbg [[DBG123]] -// CHECK1-NEXT: ret void, !dbg [[DBG123]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG110:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG110]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG110]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP8]]) #[[ATTR4:[0-9]+]], !dbg [[DBG110]] +// CHECK1-NEXT: ret void, !dbg [[DBG110]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG124:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG111:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -286,203 +240,155 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG137:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG137]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG137]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG137]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG137]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG137]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG137]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG137]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG123:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG124:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG124]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG124]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG124]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG124]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB7:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG124]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG124]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG124]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP17]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG138]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP19]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG138]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG139:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG125:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG126:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG125]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP14]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG125]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]), !dbg [[DBG125]] +// CHECK1-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP18]], ptr [[TMP17]], align 8, !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB9]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP17]]), !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i64 32), !dbg [[DBG125]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB11:[0-9]+]], i8 2, i1 true), !dbg [[DBG128:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG129:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG137]] +// CHECK1-NEXT: ret void, !dbg [[DBG124]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG142:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG130:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG156:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG156]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG156]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG156]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG156]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG160:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG160]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG160]] -// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG159]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG162]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG164]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG167:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG167]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG168:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG169:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG170:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG169]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG169]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG171:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG172:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG172]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG173:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG172]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG172]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG174:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG175:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG175]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG176:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG175]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG175]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG175]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG177:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG177]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG177]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG179:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG180:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG180]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG180]] -// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG181:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG182:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG183:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META189:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META190:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG191]] -// CHECK1-NEXT: call void @__omp_outlined___debug__1(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG191]] -// CHECK1-NEXT: ret void, !dbg [[DBG191]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG141]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG141]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG145:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG145]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG145]] +// CHECK1-NEXT: store ptr [[ARRAYIDX2]], ptr [[F]], align 8, !dbg [[DBG144]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG147]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG150]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[H]], align 8, !dbg [[DBG149]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG152:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG152]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG153:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG154:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG155:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG154]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG154]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG156:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG157:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG157]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG157]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG157]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG159:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG160:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG160]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG161:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG160]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG160]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4, !dbg [[DBG160]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG162:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG163:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG162]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG162]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX17]], align 4, !dbg [[DBG164:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG165:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG165]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG165]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG166:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG167:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG192:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG168:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META193:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG198]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG198]] -// CHECK1-NEXT: ret void, !dbg [[DBG198]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG174:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG174]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG174]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR4]], !dbg [[DBG174]] +// CHECK1-NEXT: ret void, !dbg [[DBG174]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG199:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG175:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -492,209 +398,156 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG181:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META182:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG212:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG188:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG188]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB13:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG188]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG188]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG188]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG213]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP20]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG189:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG189]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG189]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG189]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]), !dbg [[DBG189]] +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP20]], ptr [[TMP19]], align 8, !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP19]]), !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 32), !dbg [[DBG189]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG190:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG192:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG212]] +// CHECK1-NEXT: ret void, !dbg [[DBG188]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG217:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG193:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG221:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META222:![0-9]+]], metadata !DIExpression()), !dbg [[DBG221]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META223:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META225:![0-9]+]], metadata !DIExpression()), !dbg [[DBG226:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG228:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG230:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG231]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG231]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG234:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG235:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG235]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG235]] -// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[DBG234]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG240:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG240]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[DBG239]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META241:![0-9]+]], metadata !DIExpression()), !dbg [[DBG242:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG242]] -// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG243:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG244:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG245:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG244]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG244]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG246:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG247:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG247]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG248:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG247]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG247]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG249:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG250]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG251:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG250]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG250]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG250]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG252:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG253:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG252]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG252]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG254:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG256:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP17]] to i64, !dbg [[DBG255]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP18]], 0, !dbg [[DBG255]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG257:![0-9]+]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 1, !dbg [[DBG257]] -// CHECK1-NEXT: ret void, !dbg [[DBG258:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG259:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META264:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META268:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG269:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG269]] -// CHECK1-NEXT: call void @__omp_outlined___debug__3(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG269]] -// CHECK1-NEXT: ret void, !dbg [[DBG269]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG204:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG204]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG204]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META205:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG208:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG208]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX1]], i64 0, i64 1, !dbg [[DBG208]] +// CHECK1-NEXT: store ptr [[ARRAYIDX2]], ptr [[F]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[G]], align 8, !dbg [[DBG210]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[H]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG215:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG215]] +// CHECK1-NEXT: store i32 5, ptr [[TMP4]], align 4, !dbg [[DBG216:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG217:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG217]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 [[IDXPROM]], !dbg [[DBG217]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX6]], align 4, !dbg [[DBG219:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG220:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX7]], i64 0, i64 0, !dbg [[DBG220]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG221:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG220]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM9]], !dbg [[DBG220]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX10]], align 4, !dbg [[DBG222:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG223:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX11]], i64 0, i64 0, !dbg [[DBG223]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG224:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG223]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX12]], i64 0, i64 [[IDXPROM13]], !dbg [[DBG223]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4, !dbg [[DBG223]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG225:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG226:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG225]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG225]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX17]], align 4, !dbg [[DBG227:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG228:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG228]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG228]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG228]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG230:![0-9]+]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG230]] +// CHECK1-NEXT: ret void, !dbg [[DBG231:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG270:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG232:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236:![0-9]+]] // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG278]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG278]] -// CHECK1-NEXT: ret void, !dbg [[DBG278]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG240]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR4]], !dbg [[DBG240]] +// CHECK1-NEXT: ret void, !dbg [[DBG240]] // diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -334,7 +334,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED3:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8 @@ -427,13 +427,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP40:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP40]], ptr [[TMP39]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK1-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[A]], align 4 // CHECK1-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -630,15 +630,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -648,46 +650,48 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK1: .cancel.exit2: // CHECK1-NEXT: br label [[CANCEL_EXIT]] // CHECK1: .cancel.continue3: @@ -695,19 +699,19 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: cancel.cont: // CHECK1-NEXT: ret void // CHECK1: cancel.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[CANCEL_CONT]] // // @@ -716,27 +720,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -748,52 +752,58 @@ // CHECK1-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -801,12 +811,12 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK1-NEXT: store i64 [[TMP14]], ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[K]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -818,33 +828,32 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -859,88 +868,96 @@ // CHECK1-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK1-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK1-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK1-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -1021,8 +1038,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !24 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !24 @@ -1064,27 +1081,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1094,60 +1111,66 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1164,8 +1187,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1184,31 +1206,40 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1218,122 +1249,128 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8 +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], ptr [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK1-NEXT: ret void // // @@ -1664,7 +1701,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1674,23 +1711,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1700,72 +1742,76 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A2]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1776,45 +1822,53 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -1824,30 +1878,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1857,65 +1912,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1953,7 +2014,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4 @@ -2041,13 +2102,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP36:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP36]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[A]], align 4 // CHECK3-NEXT: store i32 [[TMP40]], ptr [[TMP39]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2246,15 +2307,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2264,46 +2327,48 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK3: .cancel.exit2: // CHECK3-NEXT: br label [[CANCEL_EXIT]] // CHECK3: .cancel.continue3: @@ -2311,19 +2376,19 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: cancel.cont: // CHECK3-NEXT: ret void // CHECK3: cancel.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[CANCEL_CONT]] // // @@ -2332,24 +2397,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -2358,69 +2425,73 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK3-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[K]], align 8 +// CHECK3-NEXT: store i64 [[TMP19]], ptr [[TMP4]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2432,33 +2503,32 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2473,88 +2543,96 @@ // CHECK3-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK3-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK3-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK3-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK3-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK3-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2635,8 +2713,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !25 @@ -2678,27 +2756,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2708,60 +2786,66 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK3-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2778,8 +2862,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2798,31 +2881,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2832,122 +2924,128 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK3-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], ptr [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK3-NEXT: ret void // // @@ -3278,7 +3376,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3288,23 +3386,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3314,72 +3417,76 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4 -// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A2]], align 4 +// CHECK3-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK3-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -3390,45 +3497,53 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -3438,30 +3553,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3471,65 +3587,71 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK3-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK3-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -3543,15 +3665,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK9-SAME: () #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3561,46 +3685,48 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: // CHECK9-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK9: .cancel.continue: -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK9-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK9: .cancel.exit2: // CHECK9-NEXT: br label [[CANCEL_EXIT]] // CHECK9: .cancel.continue3: @@ -3608,19 +3734,19 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK9: cancel.cont: // CHECK9-NEXT: ret void // CHECK9: cancel.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[CANCEL_CONT]] // // @@ -3630,33 +3756,32 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -3671,88 +3796,96 @@ // CHECK9-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK9-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK9-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK9-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK9-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK9-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK9-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK9-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK9-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK9-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK9-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK9-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK9-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK9-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK9-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK9-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK9-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: // CHECK9-NEXT: ret void @@ -3769,27 +3902,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3799,60 +3932,66 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK9-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -3869,8 +4008,7 @@ // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -3889,31 +4027,40 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3923,122 +4070,128 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK9-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK9-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK9-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK9-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK9-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK9-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK9-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK9-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK9-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK9-NEXT: store i64 [[ADD20]], ptr [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK9-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK9-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8 +// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK9-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK9-NEXT: store i64 [[ADD16]], ptr [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK9-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK9-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK9-NEXT: ret void // // @@ -4049,45 +4202,53 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: ret void // // @@ -4099,7 +4260,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4109,23 +4270,28 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4135,72 +4301,76 @@ // CHECK9-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK9-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK9-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A2]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK9-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK9-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK9-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4210,30 +4380,31 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4243,80 +4414,88 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK9-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK9-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK11-SAME: () #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4326,46 +4505,48 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK11-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK11-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: // CHECK11-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK11: .cancel.continue: -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK11-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK11: .cancel.exit2: // CHECK11-NEXT: br label [[CANCEL_EXIT]] // CHECK11: .cancel.continue3: @@ -4373,19 +4554,19 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK11: cancel.cont: // CHECK11-NEXT: ret void // CHECK11: cancel.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[CANCEL_CONT]] // // @@ -4395,33 +4576,32 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -4436,88 +4616,96 @@ // CHECK11-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK11-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK11-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK11-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK11-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK11-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK11-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK11-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK11-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK11-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK11-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK11-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK11-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK11-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK11-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK11-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK11-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK11-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK11-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK11-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK11-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: // CHECK11-NEXT: ret void @@ -4534,27 +4722,27 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4564,60 +4752,66 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK11-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK11-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4634,8 +4828,7 @@ // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4654,31 +4847,40 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4688,122 +4890,128 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK11-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK11-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK11-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK11-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK11-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK11-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK11-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK11-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK11-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK11-NEXT: store i64 [[ADD20]], ptr [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK11-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK11-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK11-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK11-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK11-NEXT: store i64 [[ADD16]], ptr [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK11-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK11-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK11-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK11-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK11-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK11-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK11-NEXT: ret void // // @@ -4814,45 +5022,53 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: ret void // // @@ -4864,7 +5080,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4874,23 +5090,28 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4900,72 +5121,76 @@ // CHECK11-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK11-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK11-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A4]], align 4 -// CHECK11-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK11-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A2]], align 4 +// CHECK11-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK11-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK11-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -4975,30 +5200,31 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -5008,65 +5234,71 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK11-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK11-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK11-NEXT: ret void // // @@ -5098,7 +5330,7 @@ // CHECK17-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK17-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK17-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK17-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK17-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK17-NEXT: [[A_CASTED3:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 8 @@ -5191,13 +5423,13 @@ // CHECK17-NEXT: store ptr null, ptr [[TMP36]], align 8 // CHECK17-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK17-NEXT: [[TMP40:%.*]] = load i16, ptr [[AA]], align 2 // CHECK17-NEXT: store i16 [[TMP40]], ptr [[TMP39]], align 4 -// CHECK17-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK17-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4 -// CHECK17-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[A]], align 4 // CHECK17-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4 // CHECK17-NEXT: [[TMP45:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -5394,15 +5626,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK17-SAME: () #[[ATTR2:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5412,46 +5646,48 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK17-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK17-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK17-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK17: .cancel.exit: // CHECK17-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK17: .cancel.continue: -// CHECK17-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK17-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK17: .cancel.exit2: // CHECK17-NEXT: br label [[CANCEL_EXIT]] // CHECK17: .cancel.continue3: @@ -5459,19 +5695,19 @@ // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK17: cancel.cont: // CHECK17-NEXT: ret void // CHECK17: cancel.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: br label [[CANCEL_CONT]] // // @@ -5480,27 +5716,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK17-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -5512,52 +5748,58 @@ // CHECK17-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK17-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK17-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -5565,12 +5807,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK17-NEXT: store i64 [[TMP14]], ptr [[K_ADDR]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[K]], align 8 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -5582,33 +5824,32 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -5623,88 +5864,96 @@ // CHECK17-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK17-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK17-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK17-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK17-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK17-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK17-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK17-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK17-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK17-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -5785,8 +6034,8 @@ // CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 -// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK17-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !24 // CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !24 @@ -5828,27 +6077,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5858,60 +6107,66 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // @@ -5928,8 +6183,7 @@ // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5948,31 +6202,40 @@ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5982,122 +6245,128 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK17-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK17-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK17-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK17-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK17-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK17-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK17-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK17-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD20]], ptr [[X]], align 8 -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK17-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK17-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK17-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8 +// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK17-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8 +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK17-NEXT: store i64 [[ADD16]], ptr [[X]], align 8 +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK17-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK17-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK17-NEXT: ret void // // @@ -6428,7 +6697,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -6438,23 +6707,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6464,72 +6738,76 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK17-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8 -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], ptr [[A4]], align 8 -// CHECK17-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], ptr [[A2]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK17-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK17-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK17-NEXT: ret void // // @@ -6540,45 +6818,53 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK17-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: ret void // // @@ -6588,30 +6874,31 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6621,65 +6908,71 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK17-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // // @@ -6717,7 +7010,7 @@ // CHECK19-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK19-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK19-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK19-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK19-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: [[A_CASTED3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [2 x ptr], align 4 @@ -6805,13 +7098,13 @@ // CHECK19-NEXT: store ptr null, ptr [[TMP32]], align 4 // CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP34:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK19-NEXT: [[TMP36:%.*]] = load i16, ptr [[AA]], align 2 // CHECK19-NEXT: store i16 [[TMP36]], ptr [[TMP35]], align 4 -// CHECK19-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[LIN]], align 4 // CHECK19-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4 -// CHECK19-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[A]], align 4 // CHECK19-NEXT: store i32 [[TMP40]], ptr [[TMP39]], align 4 // CHECK19-NEXT: [[TMP41:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -7010,15 +7303,17 @@ // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103 // CHECK19-SAME: () #[[ATTR2:[0-9]+]] { // CHECK19-NEXT: entry: -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7028,46 +7323,48 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK19-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK19-NEXT: br i1 [[TMP9]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK19-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK19: .cancel.exit: // CHECK19-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK19: .cancel.continue: -// CHECK19-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP1]], i32 2) -// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] +// CHECK19-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancellationpoint(ptr @[[GLOB2]], i32 [[TMP2]], i32 2) +// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 +// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT2:%.*]], label [[DOTCANCEL_CONTINUE3:%.*]] // CHECK19: .cancel.exit2: // CHECK19-NEXT: br label [[CANCEL_EXIT]] // CHECK19: .cancel.continue3: @@ -7075,19 +7372,19 @@ // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK19: cancel.cont: // CHECK19-NEXT: ret void // CHECK19: cancel.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK19-NEXT: br label [[CANCEL_CONT]] // // @@ -7096,24 +7393,26 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -7122,69 +7421,73 @@ // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK19-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK19-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK19-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK19-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK19-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK19-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK19-NEXT: store i64 [[TMP15]], ptr [[TMP0]], align 8 +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, ptr [[K]], align 8 +// CHECK19-NEXT: store i64 [[TMP19]], ptr [[TMP4]], align 8 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -7196,33 +7499,32 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -7237,88 +7539,96 @@ // CHECK19-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK19-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK19-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK19-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 -// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 -// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK19-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK19-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK19-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK19-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK19-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK19-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK19-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK19-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK19-NEXT: store i32 [[TMP20]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP21]], ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP28]], ptr [[A]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -7399,8 +7709,8 @@ // CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 -// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 // CHECK19-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP18]], align 4, !noalias !25 @@ -7442,27 +7752,27 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7472,60 +7782,66 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK19-NEXT: store i16 [[CONV]], ptr [[IT]], align 2 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // @@ -7542,8 +7858,7 @@ // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -7562,31 +7877,40 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7596,122 +7920,128 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK19-NEXT: store i8 [[CONV]], ptr [[IT]], align 1 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK19-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK19-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4 +// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4 -// CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4 -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8 -// CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8 -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], ptr [[X]], align 4 -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4 +// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4 +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK19-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK19-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4 +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK19-NEXT: store i64 [[ADD16]], ptr [[X]], align 4 +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK19-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK19-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK19-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK19-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK19-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK19-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK19-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK19-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK19-NEXT: ret void // // @@ -8042,7 +8372,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -8052,23 +8382,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -8078,72 +8413,76 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK19-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK19-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], ptr [[A4]], align 4 -// CHECK19-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2 +// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], ptr [[A2]], align 4 +// CHECK19-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK19-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK19-NEXT: ret void // // @@ -8154,45 +8493,53 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK19-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: ret void // // @@ -8202,30 +8549,31 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -8235,65 +8583,71 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK19-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK19-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK19-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -65,8 +65,8 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -94,232 +94,186 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG41]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG42:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP16]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG42]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP18]], align 8, !dbg [[DBG42]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG43:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP19]] to i1, !dbg [[DBG43]] -// CHECK1-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP20]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG42]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2, i1 true), !dbg [[DBG45:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG42:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP10]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP13]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG42]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP14]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP15:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP15]]), !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP17:%.*]] = load [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: store [[STRUCT_ANON]] [[TMP17]], ptr [[TMP16]], align 8, !dbg [[DBG42]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG45:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP18]] to i1, !dbg [[DBG45]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP19]], i32 -1, i32 -1, ptr @__omp_outlined__, ptr null, ptr [[TMP16]]), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP15]], i64 32), !dbg [[DBG42]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB8:[0-9]+]], i8 2, i1 true), !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG47:![0-9]+]] // CHECK1: worker.exit: // CHECK1-NEXT: ret void, !dbg [[DBG41]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG47:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG48:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x [10 x i32]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG65]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG65]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG65]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG74:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG63:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG66:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 [[TMP6]], i64 400, i1 false), !dbg [[DBG63]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG63]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG63]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG63]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 9, !dbg [[DBG68]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG66]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG66]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG68]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG66]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG68]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG66]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ], !dbg [[DBG68]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG65]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG66]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG72:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG65]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG72]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1, !dbg [[DBG75:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG75]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG75]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG80:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG80]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG80]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG85:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG85]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG87:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG87]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG89]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG89]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG91:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG92]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG92]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG92]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG95]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG95]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG95]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG97]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG97]] -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG99:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP25]] to i64, !dbg [[DBG100]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG100]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG102:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP27]] to i1, !dbg [[DBG102]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG102]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP26]], !dbg [[DBG102]] -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG102]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG102]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG102]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG73:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG73]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG73]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG78:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG78]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG78]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG80]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 1, !dbg [[DBG83:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG83]] +// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG85]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG87]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG87]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG89:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG90:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG90]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG91:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG90]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG90]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG93]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG94:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG93]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG93]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG93]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG95:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG95]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG95]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B]], i64 0, i64 0, !dbg [[DBG98:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG99:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG98]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG98]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG98]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG100:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP26]] to i1, !dbg [[DBG100]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG100]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP25]], !dbg [[DBG100]] +// CHECK1-NEXT: [[TOBOOL23:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG100]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL23]] to i8, !dbg [[DBG100]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG100]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG101:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG74]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG63]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP28]], 1, !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP104:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1, !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG63]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG74]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG63]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG68]] -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP31]], [[TMP32]], !dbg [[DBG65]] -// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG65]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG74]], !llvm.loop [[LOOP106:![0-9]+]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG66]] +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG72]] +// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG72]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG63]], !llvm.loop [[LOOP104:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP11]]), !dbg [[DBG105:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG107:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG108:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG116]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG122]] -// CHECK1-NEXT: call void @__omp_outlined___debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG122]] -// CHECK1-NEXT: ret void, !dbg [[DBG122]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG105:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR7:[0-9]+]] !dbg [[DBG106:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -327,32 +281,32 @@ // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG132]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG132]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr addrspace(1) [[TMP8]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG132]] -// CHECK1-NEXT: ret void, !dbg [[DBG132]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG119:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG119]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG119]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG119]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr addrspace(1) [[TMP8]], i32 [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP9]], i1 [[TOBOOL]]) #[[ATTR3:[0-9]+]], !dbg [[DBG119]] +// CHECK1-NEXT: ret void, !dbg [[DBG119]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG120:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -361,66 +315,63 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_0]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG146]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG146]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG146]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG146]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG146]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG146]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG146]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG133:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG133]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG133]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG133]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG133]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB10:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG133]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG133]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG133]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP17]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG147]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP19]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG147]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG148:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG135:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG134]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP14]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG134]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP16:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP16]]), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP18:%.*]] = load [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: store [[STRUCT_ANON_0]] [[TMP18]], ptr [[TMP17]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB15]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__1, ptr null, ptr [[TMP17]]), !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP16]], i64 32), !dbg [[DBG134]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB17:[0-9]+]], i8 2, i1 true), !dbg [[DBG137:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG138:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG146]] +// CHECK1-NEXT: ret void, !dbg [[DBG133]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG151:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG139:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -431,206 +382,161 @@ // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] -// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG165]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG173:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG150:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG153:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG150]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG150]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG150]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG168]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG153]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG153]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG168]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG153]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG168]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG153]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG168]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG165]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG153]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG165]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG165]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG158]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG174:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG174]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG174]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG178:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG178]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG178]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[DBG177]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG180]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG183:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG183]] -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG186:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG187:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG188:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG187]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG187]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG189:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG190:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG190]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG190]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG190]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG192:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG193]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG193]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG193]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG195:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG196:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG195]] -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG195]] -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG197:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG198:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG198]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG198]] -// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG199:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG200:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG159:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG159]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG159]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META160:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG163:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG163]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG163]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG162]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG165:![0-9]+]] +// CHECK1-NEXT: store ptr [[A]], ptr [[G]], align 8, !dbg [[DBG165]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG167:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG168:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG168]] +// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG167]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG170]] +// CHECK1-NEXT: store i32 5, ptr [[A]], align 4, !dbg [[DBG171:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG172:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG173:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG172]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG172]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG174:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG175:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG175]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG176:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG175]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG175]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG177:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG178:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG178]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG179:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG178]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG178]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG178]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG180:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG181:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG180]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG180]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG182:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG183:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG183]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG183]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG184:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG185:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG173]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG150]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP201:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG150]], !llvm.loop [[LOOP186:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG173]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG150]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG168]] -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG165]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG165]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG173]], !llvm.loop [[LOOP203:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG153]] +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG158]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG158]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG150]], !llvm.loop [[LOOP188:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG202:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG204:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG205:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG213]] -// CHECK1-NEXT: call void @__omp_outlined___debug__1(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG213]] -// CHECK1-NEXT: ret void, !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB14:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG189:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG190:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META193:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META220:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META221:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG222]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG222]] -// CHECK1-NEXT: ret void, !dbg [[DBG222]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[TMP3]] to ptr addrspace(1), !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG198]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr addrspace(1) [[TMP7]], i32 [[TMP4]], ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG198]] +// CHECK1-NEXT: ret void, !dbg [[DBG198]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__ -// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] { +// CHECK1-SAME: (ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG199:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -640,66 +546,65 @@ // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[DOTTMP_OUTLINED_AGG_ARG:%.*]] = alloca [[STRUCT_ANON_1]], align 8 // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG236:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG236]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG236]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG236]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG236]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG236]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG212]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG212]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB19:[0-9]+]], i8 2, i1 false, i1 true), !dbg [[DBG212]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG212]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG212]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB24:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG237:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP18]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG237]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP20]], align 8, !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__4, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG237]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2, i1 true), !dbg [[DBG238:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3, !dbg [[DBG213]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP18:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 32), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_alloc_aggregate_arg(ptr [[DOTTMP_OUTLINED_AGG_ARG]], ptr [[TMP18]]), !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP20:%.*]] = load [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: store [[STRUCT_ANON_1]] [[TMP20]], ptr [[TMP19]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB24]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr null, ptr [[TMP19]]), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[TMP18]], i64 32), !dbg [[DBG213]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB26:[0-9]+]], i8 2, i1 true), !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG236]] +// CHECK1-NEXT: ret void, !dbg [[DBG212]] // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___debug__3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] !dbg [[DBG217:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 @@ -710,212 +615,160 @@ // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META224:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225:![0-9]+]] // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] -// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META259:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META262:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG263:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META226:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG228:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG228]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG228]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB21:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG228]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG258]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG231]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG231]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG258]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG231]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG231]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG258]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG255]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG231]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG236:![0-9]+]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG255]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG236]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG264:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG264]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG264]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG268:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG268]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG268]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG267]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG270]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG273:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG273]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG272]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG275]] -// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG276:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG277]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG277]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG279:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG280:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG280]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG281:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG280]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG280]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG282:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG283:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG283]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG284:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG283]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG283]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG283]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG285:![0-9]+]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG286:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG285]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG285]] -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG287:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG288:![0-9]+]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG289:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG288]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG288]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG288]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG288]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG290:![0-9]+]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 1, !dbg [[DBG290]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG291:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG237:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG237]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG237]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG241:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG241]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG241]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG240]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[G]], align 8, !dbg [[DBG243]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 1, !dbg [[DBG246:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG246]] +// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG245]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG248]] +// CHECK1-NEXT: store i32 5, ptr [[TMP4]], align 4, !dbg [[DBG249:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG251:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG250]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG250]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG252:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG253:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG253]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG254:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG253]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG253]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG255:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG256:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG256]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG257:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG256]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG256]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG256]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG259:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG258]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG258]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG260:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP6]], i64 0, i64 0, !dbg [[DBG261:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP4]], align 4, !dbg [[DBG262:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG261]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG261]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX22]], align 4, !dbg [[DBG261]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP25]], 0, !dbg [[DBG261]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG263:![0-9]+]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP8]], align 1, !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG264:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG228]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP292:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG228]], !llvm.loop [[LOOP265:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG263]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG228]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG258]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG255]] -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG255]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG263]], !llvm.loop [[LOOP294:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG231]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG236]] +// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG236]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG228]], !llvm.loop [[LOOP267:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG293:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG295:![0-9]+]] -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG296:![0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] -// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG306]] -// CHECK1-NEXT: call void @__omp_outlined___debug__3(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG306]] -// CHECK1-NEXT: ret void, !dbg [[DBG306]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB23:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG266:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG268:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 -// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] { +// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR7]] !dbg [[DBG269:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META272:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273:![0-9]+]] // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG315]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG315]] -// CHECK1-NEXT: ret void, !dbg [[DBG315]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG273]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG277:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG277]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr addrspace(1) [[TMP8]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG277]] +// CHECK1-NEXT: ret void, !dbg [[DBG277]] // diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -52,284 +52,290 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP6]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP5]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN10]] +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP62]], 1 +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP61]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP63]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP64]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: [[TMP69:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP68]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP69]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[TMP72]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP73]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP69]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP75]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP77]], ptr [[TMP76]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 -// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP79]], ptr [[TMP69]]) +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP65]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, ptr [[TMP66]], align 4 +// CHECK1-NEXT: [[TMP68:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP67]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP68]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP69]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[TMP70]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP71]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP68]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP72]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP74]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 +// CHECK1-NEXT: [[TMP77:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP76]], ptr [[TMP68]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP81:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP81]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP78]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP82:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP82]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP83]]) -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP85]], i32 1) -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP86]], align 8 -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP88]], align 8 -// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP90:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP90]], ptr [[TMP89]], align 8 -// CHECK1-NEXT: [[TMP91:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP92:%.*]] = load i32, ptr [[TMP91]], align 4 -// CHECK1-NEXT: [[TMP94:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP92]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP94]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP80]]) +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP82]], i32 1) +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP84]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP86:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP86]], ptr [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP88]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP89]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP95]], [[TMP96]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP91:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP90]], [[TMP91]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP98]] to i32 -// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP99]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP97]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP92]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP92]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP88]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP100:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP101:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP100]] monotonic, align 4 -// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP103:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP103]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP96:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP95]] monotonic, align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP98:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP98]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP104:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP109:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP104]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP106]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP108:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP104]], i8 [[TMP107]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP109]] = extractvalue { i8, i1 } [[TMP108]], 0 -// CHECK1-NEXT: [[TMP110:%.*]] = extractvalue { i8, i1 } [[TMP108]], 1 -// CHECK1-NEXT: br i1 [[TMP110]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP99:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP104:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP99]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP101]] to i32 +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP102:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP103:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP99]], i8 [[TMP102]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP104]] = extractvalue { i8, i1 } [[TMP103]], 0 +// CHECK1-NEXT: [[TMP105:%.*]] = extractvalue { i8, i1 } [[TMP103]], 1 +// CHECK1-NEXT: br i1 [[TMP105]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP102]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP97]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP111:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP111]]) +// CHECK1-NEXT: [[TMP106:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP106]]) // CHECK1-NEXT: ret void // // @@ -340,8 +346,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -352,12 +358,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -452,59 +458,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -516,38 +522,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -311,7 +311,7 @@ // CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 @@ -595,15 +595,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK1-SAME: () #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -613,52 +615,54 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 33, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -729,27 +733,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -761,52 +765,58 @@ // CHECK1-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -814,19 +824,19 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK1-NEXT: store i64 [[TMP16]], ptr [[K_ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[K]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -838,33 +848,32 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -879,95 +888,103 @@ // CHECK1-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK1-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK1-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK1-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -978,27 +995,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1008,63 +1025,69 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1085,8 +1108,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1105,31 +1127,40 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1139,125 +1170,131 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK1-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1592,7 +1629,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1602,23 +1639,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1628,75 +1670,79 @@ // CHECK1-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK1-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1711,45 +1757,53 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: ret void // // @@ -1759,30 +1813,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -1792,68 +1847,74 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i64 11, ptr [[I]], align 8 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1886,7 +1947,7 @@ // CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 @@ -2166,15 +2227,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK3-SAME: () #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2184,52 +2247,54 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 33, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2300,24 +2365,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -2326,76 +2393,80 @@ // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 35, i32 0, i32 8, i32 1, i32 1) +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 35, i32 0, i32 8, i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK3-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 1, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK3-NEXT: store i64 [[TMP17]], ptr [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[K]], align 8 +// CHECK3-NEXT: store i64 [[TMP21]], ptr [[TMP4]], align 8 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2407,33 +2478,32 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2448,95 +2518,103 @@ // CHECK3-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK3-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK3-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK3-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK3-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK3-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK3-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK3-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -2547,27 +2625,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2577,63 +2655,69 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK3-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2654,8 +2738,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2674,31 +2757,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2708,125 +2800,131 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK3-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK3-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3161,7 +3259,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3171,23 +3269,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3197,75 +3300,79 @@ // CHECK3-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK3-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3280,45 +3387,53 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -3328,30 +3443,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -3361,68 +3477,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK3-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK3-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i64 11, ptr [[I]], align 8 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3455,7 +3577,7 @@ // CHECK5-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK5-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK5-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 @@ -3739,15 +3861,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK5-SAME: () #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3757,52 +3881,54 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK5-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 33, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3873,27 +3999,27 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[K_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[TMP2]], ptr [[K_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[K_ADDR]], align 8 +// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[K:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[K_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -3905,52 +4031,58 @@ // CHECK5-NEXT: [[K1:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[K]], ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[K_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[TMP0]], ptr [[DOTLINEAR_START]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[K]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[K]], align 8 +// CHECK5-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]]) -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP2]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP3]], 0 +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP14]], 3 // CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] // CHECK5-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3958,19 +4090,19 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 1, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK5-NEXT: store i64 [[TMP16]], ptr [[K_ADDR]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[K1]], align 8 +// CHECK5-NEXT: store i64 [[TMP21]], ptr [[K]], align 8 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -3982,33 +4114,32 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -4023,95 +4154,103 @@ // CHECK5-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK5-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK5-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK5-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK5-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK5-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK5-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK5-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK5-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK5-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK5-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK5-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK5-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -4122,27 +4261,27 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4152,63 +4291,69 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK5-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK5-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK5-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4229,8 +4374,7 @@ // CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4249,31 +4393,40 @@ // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK5-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4283,125 +4436,131 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK5-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK5-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK5-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK5-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK5-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK5-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK5-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK5-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK5-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK5-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK5-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK5-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK5-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK5-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK5-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK5-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK5-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK5-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK5-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK5-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK5-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK5-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK5-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK5-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK5-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK5-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4757,8 +4916,7 @@ // CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -4772,25 +4930,33 @@ // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK5-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..10, ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK5-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) #[[ATTR4]] +// CHECK5-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -4798,16 +4964,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -4817,136 +4980,144 @@ // CHECK5-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK5-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK5-NEXT: store double [[ADD]], ptr [[A]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[A4]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], ptr [[A3]], align 8, !nontemporal !39, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK5-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK5-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK5-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK5: cond.true9: -// CHECK5-NEXT: br label [[COND_END11:%.*]] -// CHECK5: cond.false10: -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: br label [[COND_END11]] -// CHECK5: cond.end11: -// CHECK5-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK5-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK5: omp.inner.for.cond13: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK5-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK5: omp.inner.for.body15: -// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK5-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK5-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK5-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK5-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD19]], ptr [[A20]], align 8 -// CHECK5-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 8 -// CHECK5-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC22]], ptr [[A21]], align 8 -// CHECK5-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK5-NEXT: [[TMP27:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP27]] -// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i64 1 -// CHECK5-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK5: omp.body.continue26: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK5-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK5: omp.inner.for.end29: +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK5: cond.true8: +// CHECK5-NEXT: br label [[COND_END10:%.*]] +// CHECK5: cond.false9: +// CHECK5-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: br label [[COND_END10]] +// CHECK5: cond.end10: +// CHECK5-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK5-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK5: omp.inner.for.cond12: +// CHECK5-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK5-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK5: omp.inner.for.body14: +// CHECK5-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK5-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK5-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK5-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK5-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD18]], ptr [[A19]], align 8 +// CHECK5-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 8 +// CHECK5-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC21]], ptr [[A20]], align 8 +// CHECK5-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK5-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP36]] +// CHECK5-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i64 1 +// CHECK5-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK5: omp.body.continue25: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK5: omp.inner.for.inc26: +// CHECK5-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK5-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK5: omp.inner.for.end28: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK5-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK5-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4961,45 +5132,53 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK5-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK5-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK5-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK5-NEXT: ret void // // @@ -5009,30 +5188,31 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -5042,68 +5222,74 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK5-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK5-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK5-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i64 11, ptr [[I]], align 8 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5136,7 +5322,7 @@ // CHECK7-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 // CHECK7-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 -// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK7-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[LIN:%.*]] = alloca i32, align 4 @@ -5416,15 +5602,17 @@ // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK7-SAME: () #[[ATTR2:[0-9]+]] { // CHECK7-NEXT: entry: -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5434,52 +5622,54 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK7-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 33, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5550,24 +5740,26 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[K:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[K_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTLINEAR_START:%.*]] = alloca i64, align 8 @@ -5576,76 +5768,80 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[K1:%.*]] = alloca i64, align 8 +// CHECK7-NEXT: [[K:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 -// CHECK7-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK7-NEXT: store i64 [[TMP5]], ptr [[DOTLINEAR_START]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 8, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP7]]) +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 1073741859, i32 0, i32 8, i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP3]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0 +// CHECK7-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB2]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] // CHECK7-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 -// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK7-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL1:%.*]] = mul nsw i32 [[TMP14]], 3 +// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL1]] to i64 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP13]], [[CONV]] +// CHECK7-NEXT: store i64 [[ADD]], ptr [[K]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK7-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 1, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK7-NEXT: br i1 [[TMP16]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK7: .omp.linear.pu: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[K1]], align 8 -// CHECK7-NEXT: store i64 [[TMP17]], ptr [[TMP0]], align 8 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[K]], align 8 +// CHECK7-NEXT: store i64 [[TMP21]], ptr [[TMP4]], align 8 // CHECK7-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK7: .omp.linear.pu.done: // CHECK7-NEXT: ret void @@ -5657,33 +5853,32 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..3, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -5698,95 +5893,103 @@ // CHECK7-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK7-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() // CHECK7-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP3]]) -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3]], i32 [[TMP10]]) +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK7-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK7-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK7-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK7-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK7-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK7-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK7-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK7-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK7-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK7-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK7-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK7: .omp.linear.pu: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK7-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK7-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK7-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK7-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK7: .omp.linear.pu.done: // CHECK7-NEXT: ret void @@ -5797,27 +6000,27 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..4, i32 [[TMP1]], i32 [[TMP3]]) -// CHECK7-NEXT: ret void +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) +// CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5827,63 +6030,69 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK7-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK7-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK7-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5904,8 +6113,7 @@ // CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -5924,31 +6132,40 @@ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..7, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK7-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK7-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5958,125 +6175,131 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK7-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK7-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK7-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK7-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK7-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK7-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK7-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK7-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK7-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK7-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK7-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK7-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK7-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK7-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK7-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK7-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK7-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK7-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK7-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK7-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK7-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK7-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK7-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK7-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK7-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK7-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK7-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK7-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK7-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK7-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK7-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6432,8 +6655,7 @@ // CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -6447,25 +6669,33 @@ // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK7-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..10, ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK7-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) #[[ATTR4]] +// CHECK7-NEXT: call void @.omp_outlined..10(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR4]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -6473,16 +6703,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6492,136 +6719,144 @@ // CHECK7-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK7-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK7-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: store double [[ADD]], ptr [[A]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], ptr [[A4]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC]], ptr [[A3]], align 4, !nontemporal !40, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK7-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK7-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK7-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK7-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK7: cond.true9: -// CHECK7-NEXT: br label [[COND_END11:%.*]] -// CHECK7: cond.false10: -// CHECK7-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: br label [[COND_END11]] -// CHECK7: cond.end11: -// CHECK7-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK7-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK7: omp.inner.for.cond13: -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK7-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK7: omp.inner.for.body15: -// CHECK7-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK7-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK7-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK7-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK7-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD19]], ptr [[A20]], align 4 -// CHECK7-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 4 -// CHECK7-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC22]], ptr [[A21]], align 4 -// CHECK7-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK7-NEXT: [[TMP27:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP27]] -// CHECK7-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i32 1 -// CHECK7-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK7: omp.body.continue26: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK7-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP43:![0-9]+]] -// CHECK7: omp.inner.for.end29: +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK7-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK7: cond.true8: +// CHECK7-NEXT: br label [[COND_END10:%.*]] +// CHECK7: cond.false9: +// CHECK7-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: br label [[COND_END10]] +// CHECK7: cond.end10: +// CHECK7-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK7-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK7: omp.inner.for.cond12: +// CHECK7-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK7-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK7: omp.inner.for.body14: +// CHECK7-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK7-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK7-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK7-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK7-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD18]], ptr [[A19]], align 4 +// CHECK7-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 4 +// CHECK7-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC21]], ptr [[A20]], align 4 +// CHECK7-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK7-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP36]] +// CHECK7-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i32 1 +// CHECK7-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK7: omp.body.continue25: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK7: omp.inner.for.inc26: +// CHECK7-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK7-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK7: omp.inner.for.end28: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK7-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK7-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6636,45 +6871,53 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK7-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..13, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK7-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK7-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK7-NEXT: ret void // // @@ -6684,30 +6927,31 @@ // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..16, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK7-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -6717,68 +6961,74 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK7-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK7-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK7-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK7-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK7-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK7-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i64 11, ptr [[I]], align 8 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8800,15 +9050,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8818,52 +9070,54 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 33, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -8877,33 +9131,32 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -8918,95 +9171,103 @@ // CHECK17-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK17-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK17-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK17-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK17-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK17-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK17-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK17-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK17-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK17-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK17-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK17-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -9023,27 +9284,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9053,63 +9314,69 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9130,8 +9397,7 @@ // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -9150,31 +9416,40 @@ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9184,125 +9459,131 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK17-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK17-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK17-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK17-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK17-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK17-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK17-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK17-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK17-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK17-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK17-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK17-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK17-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK17-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK17-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK17-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK17-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9317,45 +9598,53 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK17-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: ret void // // @@ -9367,7 +9656,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -9377,23 +9666,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -9403,75 +9697,79 @@ // CHECK17-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK17-NEXT: store double [[ADD]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], ptr [[A2]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK17-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK17-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9485,30 +9783,31 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -9518,68 +9817,74 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK17-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK17-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i64 11, ptr [[I]], align 8 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9590,15 +9895,17 @@ // CHECK19-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK19-SAME: () #[[ATTR0:[0-9]+]] { // CHECK19-NEXT: entry: -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9608,52 +9915,54 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 33, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9667,33 +9976,32 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -9708,95 +10016,103 @@ // CHECK19-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK19-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK19-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK19-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK19-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK19-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK19-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK19-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK19-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK19-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK19-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK19-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK19-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK19-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK19-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK19-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -9813,27 +10129,27 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9843,63 +10159,69 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK19-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9920,8 +10242,7 @@ // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -9940,31 +10261,40 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9974,125 +10304,131 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK19-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK19-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK19-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK19-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK19-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK19-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK19-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK19-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK19-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK19-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK19-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK19-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK19-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK19-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK19-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK19-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10107,45 +10443,53 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK19-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: ret void // // @@ -10157,7 +10501,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -10167,23 +10511,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -10193,75 +10542,79 @@ // CHECK19-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP6]], 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP13]], 3 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp ule i64 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP18]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK19-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // CHECK19-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK19-NEXT: store double [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], ptr [[A2]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2, !llvm.access.group [[ACC_GRP27]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK19-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK19-NEXT: [[ADD5:%.*]] = add i64 [[TMP22]], 1 +// CHECK19-NEXT: store i64 [[ADD5]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10275,30 +10628,31 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK19-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -10308,68 +10662,74 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK19-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK19-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK19-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK19-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK19-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i64 11, ptr [[I]], align 8 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10380,15 +10740,17 @@ // CHECK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK21-SAME: () #[[ATTR0:[0-9]+]] { // CHECK21-NEXT: entry: -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10398,52 +10760,54 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK21-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK21-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK21-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 33, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10457,33 +10821,32 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[LIN_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP1]], i64 [[TMP3]], i64 [[TMP5]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[LIN:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[LIN_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -10498,95 +10861,103 @@ // CHECK21-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[LIN]], ptr [[LIN_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK21-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK21-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK21-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK21-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK21-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK21-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK21-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK21-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK21-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK21-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK21-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK21-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK21-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP17]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK21-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK21-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK21-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK21-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP17]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK21-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK21-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK21: .omp.linear.pu: -// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK21-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK21-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK21-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK21-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK21-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK21: .omp.linear.pu.done: // CHECK21-NEXT: ret void @@ -10603,27 +10974,27 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10633,63 +11004,69 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK21-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK21-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK21-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK21-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP20]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK21-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK21-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK21-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10710,8 +11087,7 @@ // CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10730,31 +11106,40 @@ // CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK21-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK21-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK21-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10764,125 +11149,131 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK21-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK21-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK21-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK21: omp.dispatch.cond: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK21-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK21-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK21-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK21-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK21-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK21-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK21-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK21-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK21-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK21-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK21-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK21-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK21-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK21-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK21-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK21-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK21-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK21-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK21-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK21-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK21-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK21-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK21-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK21-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP23]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK21-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK21-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK21-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: -// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK21-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK21-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK21-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK21-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK21-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK21: omp.dispatch.end: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK21-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK21-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK21-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10897,45 +11288,53 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK21-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK21-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK21-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: ret void // // @@ -10948,8 +11347,7 @@ // CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK21-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -10963,25 +11361,33 @@ // CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK21-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: // CHECK21-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK21-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK21-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK21-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -10989,16 +11395,13 @@ // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -11008,136 +11411,144 @@ // CHECK21-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK21-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK21-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK21-NEXT: store double [[ADD]], ptr [[A]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], ptr [[A4]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC]], ptr [[A3]], align 8, !nontemporal !27, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK21-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK21-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP26]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] -// CHECK21-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK21-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK21-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK21-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP26]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK21-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK21: cond.true9: -// CHECK21-NEXT: br label [[COND_END11:%.*]] -// CHECK21: cond.false10: -// CHECK21-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: br label [[COND_END11]] -// CHECK21: cond.end11: -// CHECK21-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK21-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK21: omp.inner.for.cond13: -// CHECK21-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK21-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK21: omp.inner.for.body15: -// CHECK21-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK21-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK21-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK21-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK21-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD19]], ptr [[A20]], align 8 -// CHECK21-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 8 -// CHECK21-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC22]], ptr [[A21]], align 8 -// CHECK21-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK21-NEXT: [[TMP27:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP27]] -// CHECK21-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i64 1 -// CHECK21-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK21: omp.body.continue26: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK21: omp.inner.for.inc27: -// CHECK21-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK21-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]] -// CHECK21: omp.inner.for.end29: +// CHECK21-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK21-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK21: cond.true8: +// CHECK21-NEXT: br label [[COND_END10:%.*]] +// CHECK21: cond.false9: +// CHECK21-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: br label [[COND_END10]] +// CHECK21: cond.end10: +// CHECK21-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK21-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK21: omp.inner.for.cond12: +// CHECK21-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK21-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK21: omp.inner.for.body14: +// CHECK21-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK21-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK21-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK21-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK21-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK21-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD18]], ptr [[A19]], align 8 +// CHECK21-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 8 +// CHECK21-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC21]], ptr [[A20]], align 8 +// CHECK21-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK21-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP36]] +// CHECK21-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i64 1 +// CHECK21-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK21: omp.body.continue25: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK21: omp.inner.for.inc26: +// CHECK21-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK21-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK21: omp.inner.for.end28: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK21-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK21-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11151,30 +11562,31 @@ // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -11184,68 +11596,74 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK21-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK21-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK21-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK21-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK21-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK21-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK21-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i64 11, ptr [[I]], align 8 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11256,15 +11674,17 @@ // CHECK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96 // CHECK23-SAME: () #[[ATTR0:[0-9]+]] { // CHECK23-NEXT: entry: -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11274,52 +11694,54 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 5, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 5 +// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 5 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 5, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK23-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK23-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK23-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 33, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11333,33 +11755,32 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[LIN_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[LIN:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[LIN_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[LIN:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -11374,95 +11795,103 @@ // CHECK23-NEXT: [[A3:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[LIN]], ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTLINEAR_START]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[DOTLINEAR_START1]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTLINEAR_START1]], align 4 // CHECK23-NEXT: [[CALL:%.*]] = call noundef i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK23-NEXT: store i64 [[CALL]], ptr [[DOTLINEAR_STEP]], align 8 // CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK23-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP3]]) -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP4]], 3 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP10]]) +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP11]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK23-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP14]], [[TMP15]] // CHECK23-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 +// CHECK23-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK23-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[MUL5:%.*]] = mul i64 [[TMP18]], [[TMP19]] // CHECK23-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK23-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 // CHECK23-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK23-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CONV7:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK23-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[MUL8:%.*]] = mul i64 [[TMP21]], [[TMP22]] // CHECK23-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK23-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 // CHECK23-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK23-NEXT: [[TMP23:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[CONV11:%.*]] = sext i16 [[TMP23]] to i32 // CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK23-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK23-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: store i16 [[CONV13]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK23-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 +// CHECK23-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK23-NEXT: [[ADD14:%.*]] = add i64 [[TMP24]], 1 // CHECK23-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK23-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK23-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK23: .omp.linear.pu: -// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[LIN2]], align 4 -// CHECK23-NEXT: store i32 [[TMP22]], ptr [[LIN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[A3]], align 4 -// CHECK23-NEXT: store i32 [[TMP23]], ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[LIN2]], align 4 +// CHECK23-NEXT: store i32 [[TMP29]], ptr [[LIN]], align 4 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[A3]], align 4 +// CHECK23-NEXT: store i32 [[TMP30]], ptr [[A]], align 4 // CHECK23-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK23: .omp.linear.pu.done: // CHECK23-NEXT: ret void @@ -11479,27 +11908,27 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11509,63 +11938,69 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i16, align 2 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 3, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 3 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 4 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK23-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 // CHECK23-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: store i16 [[CONV5]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK23-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK23-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK23-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i16 22, ptr [[IT]], align 2 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11586,8 +12021,7 @@ // CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -11606,31 +12040,40 @@ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK23-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK23-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11640,125 +12083,131 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i8, align 1 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK23-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK23-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK23: omp.dispatch.cond: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 25 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 25 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 25, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK23-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK23-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 // CHECK23-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK23-NEXT: store i32 [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CONV3:%.*]] = fpext float [[TMP33]] to double +// CHECK23-NEXT: [[ADD4:%.*]] = fadd double [[CONV3]], 1.000000e+00 +// CHECK23-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK23-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK23-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double // CHECK23-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK23-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK23-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK23-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK23-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK23-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK23-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK23-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK23-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK23-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK23-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK23-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK23-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK23-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK23-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK23-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK23-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK23-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK23-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK23-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK23-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: -// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK23-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK23-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK23-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK23-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK23-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK23: omp.dispatch.end: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) -// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK23-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK23-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK23-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i8 96, ptr [[IT]], align 1 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11773,45 +12222,53 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK23-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK23-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK23-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: ret void // // @@ -11824,8 +12281,7 @@ // CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK23-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) @@ -11839,25 +12295,33 @@ // CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP4]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK23-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP13]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: // CHECK23-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK23-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] +// CHECK23-NEXT: call void @.omp_outlined..5(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK23-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -11865,16 +12329,13 @@ // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -11884,136 +12345,144 @@ // CHECK23-NEXT: [[IT:%.*]] = alloca i64, align 8 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP13]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: -// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP7]], 3 +// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP16]], 3 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 3, [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] -// CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i64 [[TMP19]], [[TMP20]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 +// CHECK23-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP21]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK23-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double // CHECK23-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK23-NEXT: store double [[ADD]], ptr [[A]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], ptr [[A4]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 -// CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP23:%.*]] = load double, ptr [[A3]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC]], ptr [[A3]], align 4, !nontemporal !28, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK23-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP27]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] -// CHECK23-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK23-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] +// CHECK23-NEXT: [[ADD6:%.*]] = add i64 [[TMP25]], 1 +// CHECK23-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP27]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[TMP19]], 3 -// CHECK23-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK23: cond.true9: -// CHECK23-NEXT: br label [[COND_END11:%.*]] -// CHECK23: cond.false10: -// CHECK23-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: br label [[COND_END11]] -// CHECK23: cond.end11: -// CHECK23-NEXT: [[COND12:%.*]] = phi i64 [ 3, [[COND_TRUE9]] ], [ [[TMP20]], [[COND_FALSE10]] ] -// CHECK23-NEXT: store i64 [[COND12]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13:%.*]] -// CHECK23: omp.inner.for.cond13: -// CHECK23-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP14:%.*]] = icmp ule i64 [[TMP22]], [[TMP23]] -// CHECK23-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY15:%.*]], label [[OMP_INNER_FOR_END29:%.*]] -// CHECK23: omp.inner.for.body15: -// CHECK23-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[MUL16:%.*]] = mul i64 [[TMP24]], 400 -// CHECK23-NEXT: [[SUB17:%.*]] = sub i64 2000, [[MUL16]] -// CHECK23-NEXT: store i64 [[SUB17]], ptr [[IT]], align 8 -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV18:%.*]] = sitofp i32 [[TMP25]] to double -// CHECK23-NEXT: [[ADD19:%.*]] = fadd double [[CONV18]], 1.500000e+00 -// CHECK23-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD19]], ptr [[A20]], align 4 -// CHECK23-NEXT: [[A21:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP26:%.*]] = load double, ptr [[A21]], align 4 -// CHECK23-NEXT: [[INC22:%.*]] = fadd double [[TMP26]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC22]], ptr [[A21]], align 4 -// CHECK23-NEXT: [[CONV23:%.*]] = fptosi double [[INC22]] to i16 -// CHECK23-NEXT: [[TMP27:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP27]] -// CHECK23-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX24]], i32 1 -// CHECK23-NEXT: store i16 [[CONV23]], ptr [[ARRAYIDX25]], align 2 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] -// CHECK23: omp.body.continue26: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] -// CHECK23: omp.inner.for.inc27: -// CHECK23-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 -// CHECK23-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP31:![0-9]+]] -// CHECK23: omp.inner.for.end29: +// CHECK23-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8u(ptr @[[GLOB1]], i32 [[TMP27]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP28]], 3 +// CHECK23-NEXT: br i1 [[CMP7]], label [[COND_TRUE8:%.*]], label [[COND_FALSE9:%.*]] +// CHECK23: cond.true8: +// CHECK23-NEXT: br label [[COND_END10:%.*]] +// CHECK23: cond.false9: +// CHECK23-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: br label [[COND_END10]] +// CHECK23: cond.end10: +// CHECK23-NEXT: [[COND11:%.*]] = phi i64 [ 3, [[COND_TRUE8]] ], [ [[TMP29]], [[COND_FALSE9]] ] +// CHECK23-NEXT: store i64 [[COND11]], ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12:%.*]] +// CHECK23: omp.inner.for.cond12: +// CHECK23-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP13:%.*]] = icmp ule i64 [[TMP31]], [[TMP32]] +// CHECK23-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY14:%.*]], label [[OMP_INNER_FOR_END28:%.*]] +// CHECK23: omp.inner.for.body14: +// CHECK23-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[MUL15:%.*]] = mul i64 [[TMP33]], 400 +// CHECK23-NEXT: [[SUB16:%.*]] = sub i64 2000, [[MUL15]] +// CHECK23-NEXT: store i64 [[SUB16]], ptr [[IT]], align 8 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, ptr [[B]], align 4 +// CHECK23-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP34]] to double +// CHECK23-NEXT: [[ADD18:%.*]] = fadd double [[CONV17]], 1.500000e+00 +// CHECK23-NEXT: [[A19:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD18]], ptr [[A19]], align 4 +// CHECK23-NEXT: [[A20:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP35:%.*]] = load double, ptr [[A20]], align 4 +// CHECK23-NEXT: [[INC21:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC21]], ptr [[A20]], align 4 +// CHECK23-NEXT: [[CONV22:%.*]] = fptosi double [[INC21]] to i16 +// CHECK23-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP36]] +// CHECK23-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX23]], i32 1 +// CHECK23-NEXT: store i16 [[CONV22]], ptr [[ARRAYIDX24]], align 2 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE25:%.*]] +// CHECK23: omp.body.continue25: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC26:%.*]] +// CHECK23: omp.inner.for.inc26: +// CHECK23-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[ADD27:%.*]] = add i64 [[TMP37]], 1 +// CHECK23-NEXT: store i64 [[ADD27]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND12]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK23: omp.inner.for.end28: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK23-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK23-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 400, ptr [[IT]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12027,30 +12496,31 @@ // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK23-NEXT: [[TMP:%.*]] = alloca i64, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 @@ -12060,68 +12530,74 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i64, align 8 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK23-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP3]], 6 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK23-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], 6 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i64 [ 6, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK23-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK23-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK23-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK23-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 +// CHECK23-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP14]], 3 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK23-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK23-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP18]], 1 // CHECK23-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP33]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i64 11, ptr [[I]], align 8 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -577,7 +577,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -585,20 +585,22 @@ // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -606,21 +608,25 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -630,23 +636,26 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -654,16 +663,18 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -672,6 +683,7 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -680,13 +692,13 @@ // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: @@ -694,29 +706,36 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -724,34 +743,39 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] +// CHECK1-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -760,36 +784,42 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1195,7 +1225,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1203,20 +1233,22 @@ // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1224,21 +1256,25 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1248,23 +1284,26 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1272,16 +1311,18 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1290,6 +1331,7 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1298,13 +1340,13 @@ // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -1312,29 +1354,36 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1342,34 +1391,39 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] +// CHECK3-NEXT: call void @.omp_outlined..8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1378,36 +1432,42 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1422,6 +1482,7 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -1430,13 +1491,13 @@ // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK9-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1444,29 +1505,36 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK9-SAME: () #[[ATTR0]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1476,7 +1544,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1484,20 +1552,22 @@ // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1505,21 +1575,25 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1529,23 +1603,26 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1553,16 +1630,18 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1571,34 +1650,39 @@ // CHECK9-SAME: (i64 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] +// CHECK9-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1607,36 +1691,42 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1644,6 +1734,7 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -1652,13 +1743,13 @@ // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK11-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1666,29 +1757,36 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108 // CHECK11-SAME: () #[[ATTR0]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1698,7 +1796,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -1706,20 +1804,22 @@ // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1727,21 +1827,25 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1751,23 +1855,26 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[TMP1]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -1775,16 +1882,18 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1793,34 +1902,39 @@ // CHECK11-SAME: (i32 noundef [[A:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] +// CHECK11-NEXT: call void @.omp_outlined..4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // // @@ -1829,35 +1943,41 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -546,7 +546,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -554,29 +554,35 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -585,25 +591,30 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -612,21 +623,25 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -634,40 +649,48 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -677,8 +700,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -686,32 +708,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1079,7 +1108,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1087,29 +1116,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1118,25 +1153,30 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1145,21 +1185,25 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1167,40 +1211,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1210,8 +1262,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1219,32 +1270,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1259,21 +1317,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1281,21 +1343,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1305,7 +1371,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1313,29 +1379,35 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1344,25 +1416,30 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1370,19 +1447,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR0]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1392,8 +1473,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1401,32 +1481,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1434,21 +1521,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1456,21 +1547,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1480,7 +1575,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1488,29 +1583,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1519,25 +1620,30 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1545,19 +1651,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR0]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1567,8 +1677,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1576,31 +1685,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -52,230 +52,236 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP24:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP23:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP55]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP58]], ptr [[TMP57]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 -// CHECK1-NEXT: [[TMP61:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP60]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP61]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP63]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP65]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP61]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP67]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP69]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 0, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP57]], ptr [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, ptr [[TMP58]], align 4 +// CHECK1-NEXT: [[TMP60:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP59]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP60]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP61]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP63:%.*]] = load ptr, ptr [[TMP62]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP63]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP60]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP64]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP66:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 +// CHECK1-NEXT: [[TMP69:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP68]], ptr [[TMP60]]) // CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 -// CHECK1-NEXT: [[TMP72:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP71]], ptr [[TMP61]]) -// CHECK1-NEXT: [[TMP73:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP74]], i32 0) -// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP75]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP83]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP71]], i32 0) +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP72]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 +// CHECK1-NEXT: [[TMP78:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB2:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP78]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP84]], [[TMP85]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE18:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP79]], [[TMP80]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE17:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST12:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT16:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP87]] to i32 -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV]], [[CONV13]] -// CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT16]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST12]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST11:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT15:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP82:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP82]] to i32 +// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV12:%.*]] = sext i8 [[TMP83]] to i32 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV]], [[CONV12]] +// CHECK1-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i8 +// CHECK1-NEXT: store i8 [[CONV14]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT15]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST11]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE17:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT16]], [[TMP86]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_DONE18]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done18: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE16:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT15]], [[TMP81]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE16]], label [[OMP_ARRAYCPY_DONE17]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done17: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB2]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP89:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP90:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP89]] monotonic, align 4 -// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY19:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY19]], label [[OMP_ARRAYCPY_DONE32:%.*]], label [[OMP_ARRAYCPY_BODY20:%.*]] -// CHECK1: omp.arraycpy.body20: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST21:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT30:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST22:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT29:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP92]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]] monotonic, align 1 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP84]] monotonic, align 4 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY18:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY18]], label [[OMP_ARRAYCPY_DONE31:%.*]], label [[OMP_ARRAYCPY_BODY19:%.*]] +// CHECK1: omp.arraycpy.body19: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST20:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT29:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST21:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT28:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV22:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP93:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY20]] ], [ [[TMP98:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP93]], ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[_TMP24]], align 1 -// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP94]] to i32 -// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP95]] to i32 -// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[CONV25]], [[CONV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = trunc i32 [[ADD27]] to i8 -// CHECK1-NEXT: store i8 [[CONV28]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP96:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP97:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i8 [[TMP93]], i8 [[TMP96]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP98]] = extractvalue { i8, i1 } [[TMP97]], 0 -// CHECK1-NEXT: [[TMP99:%.*]] = extractvalue { i8, i1 } [[TMP97]], 1 -// CHECK1-NEXT: br i1 [[TMP99]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP88:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY19]] ], [ [[TMP93:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP88]], ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[TMP89:%.*]] = load i8, ptr [[_TMP23]], align 1 +// CHECK1-NEXT: [[CONV24:%.*]] = sext i8 [[TMP89]] to i32 +// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP90]] to i32 +// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[CONV24]], [[CONV25]] +// CHECK1-NEXT: [[CONV27:%.*]] = trunc i32 [[ADD26]] to i8 +// CHECK1-NEXT: store i8 [[CONV27]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP92:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i8 [[TMP88]], i8 [[TMP91]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP93]] = extractvalue { i8, i1 } [[TMP92]], 0 +// CHECK1-NEXT: [[TMP94:%.*]] = extractvalue { i8, i1 } [[TMP92]], 1 +// CHECK1-NEXT: br i1 [[TMP94]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST22]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT30]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST21]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE31:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT29]], [[TMP91]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_DONE32]], label [[OMP_ARRAYCPY_BODY20]] -// CHECK1: omp.arraycpy.done32: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT28]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST21]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT29]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST20]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE30:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT28]], [[TMP86]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE30]], label [[OMP_ARRAYCPY_DONE31]], label [[OMP_ARRAYCPY_BODY19]] +// CHECK1: omp.arraycpy.done31: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP100:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP100]]) +// CHECK1-NEXT: [[TMP95:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP95]]) // CHECK1-NEXT: ret void // // @@ -286,8 +292,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -298,12 +304,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -398,59 +404,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -462,38 +468,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -322,7 +322,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -395,13 +395,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -706,7 +706,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -714,22 +714,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -808,8 +813,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !24 @@ -852,27 +857,32 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -880,29 +890,34 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -911,39 +926,45 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK1-NEXT: ret void // // @@ -959,7 +980,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -977,82 +998,95 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK1-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK1-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK1-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK1-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK1-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK1-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK1-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK1-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK1-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK1-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK1-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK1-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK1-NEXT: ret void // // @@ -1060,41 +1094,49 @@ // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i64 [[TMP1]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK1-NEXT: ret void // // @@ -1102,38 +1144,47 @@ // CHECK1-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[NN_ADDR]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[NN]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: ret void // // @@ -1190,23 +1241,28 @@ // CHECK1-SAME: (i64 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, i64 [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 // CHECK1-NEXT: ret void // // @@ -1537,7 +1593,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1547,48 +1603,57 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..23, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK1-NEXT: ret void // // @@ -1599,59 +1664,67 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..26, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1662,47 +1735,54 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..29, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: ret void // @@ -1734,7 +1814,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -1805,13 +1885,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2118,7 +2198,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2126,22 +2206,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2220,8 +2305,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !25 @@ -2264,27 +2349,32 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -2292,29 +2382,34 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2323,39 +2418,45 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK3-NEXT: ret void // // @@ -2371,7 +2472,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2389,82 +2490,95 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK3-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK3-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK3-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK3-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK3-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK3-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK3-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK3-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK3-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK3-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK3-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK3-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK3-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK3-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK3-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -2472,41 +2586,49 @@ // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i32 [[TMP1]]) +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK3-NEXT: ret void // // @@ -2514,38 +2636,47 @@ // CHECK3-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[NN_ADDR]]) +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[NN]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: ret void // // @@ -2601,23 +2732,28 @@ // CHECK3-SAME: (i32 noundef [[VLA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, i32 [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[F:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 // CHECK3-NEXT: ret void // // @@ -2948,7 +3084,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2958,48 +3094,57 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..23, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK3-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK3-NEXT: ret void // // @@ -3010,59 +3155,67 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..26, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -3073,47 +3226,54 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..29, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..29, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..29 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: ret void // @@ -3131,7 +3291,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3139,22 +3299,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3162,29 +3327,34 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3193,39 +3363,45 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK9-NEXT: ret void // // @@ -3241,7 +3417,7 @@ // CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -3259,82 +3435,95 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK9-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK9-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK9-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK9-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK9-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP12]] -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i64 3 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 8 -// CHECK9-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK9-NEXT: store i64 [[ADD17]], ptr [[X]], align 8 -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 8 -// CHECK9-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK9-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK9-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK9-NEXT: store i8 [[CONV20]], ptr [[Y]], align 8 +// CHECK9-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP23]] +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8 +// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK9-NEXT: store i64 [[ADD13]], ptr [[X]], align 8 +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8 +// CHECK9-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK9-NEXT: store i8 [[CONV16]], ptr [[Y]], align 8 // CHECK9-NEXT: ret void // // @@ -3342,41 +3531,49 @@ // CHECK9-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i64 [[TMP1]]) +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK9-NEXT: ret void // // @@ -3384,38 +3581,47 @@ // CHECK9-SAME: (i64 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[NN_ADDR]]) +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[NN]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: ret void // // @@ -3423,23 +3629,28 @@ // CHECK9-SAME: (i64 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, i64 [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[F:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 // CHECK9-NEXT: ret void // // @@ -3450,59 +3661,67 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..9, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -3515,7 +3734,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -3525,48 +3744,57 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A3]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP6:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP6]] -// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK9-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A1]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP13:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK9-NEXT: ret void // // @@ -3576,47 +3804,54 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..11, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: ret void // @@ -3627,7 +3862,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -3635,22 +3870,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3658,29 +3898,34 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV1]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3689,39 +3934,45 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 // CHECK11-NEXT: ret void // // @@ -3737,7 +3988,7 @@ // CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3755,82 +4006,95 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP9]] to double -// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double +// CHECK11-NEXT: [[ADD1:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD1]] to float +// CHECK11-NEXT: store float [[CONV2]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fpext float [[TMP21]] to double +// CHECK11-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.000000e+00 // CHECK11-NEXT: [[CONV6:%.*]] = fptrunc double [[ADD5]] to float -// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[CONV8:%.*]] = fpext float [[TMP10]] to double -// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[CONV8]], 1.000000e+00 -// CHECK11-NEXT: [[CONV10:%.*]] = fptrunc double [[ADD9]] to float -// CHECK11-NEXT: store float [[CONV10]], ptr [[ARRAYIDX7]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX11]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load double, ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[ADD13:%.*]] = fadd double [[TMP11]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD13]], ptr [[ARRAYIDX12]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP12]] -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX14]], i32 3 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8 -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[X]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP14]], 1 -// CHECK11-NEXT: store i64 [[ADD17]], ptr [[X]], align 4 -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i8, ptr [[Y]], align 4 -// CHECK11-NEXT: [[CONV18:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK11-NEXT: [[ADD19:%.*]] = add nsw i32 [[CONV18]], 1 -// CHECK11-NEXT: [[CONV20:%.*]] = trunc i32 [[ADD19]] to i8 -// CHECK11-NEXT: store i8 [[CONV20]], ptr [[Y]], align 4 +// CHECK11-NEXT: store float [[CONV6]], ptr [[ARRAYIDX3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX7]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD9]], ptr [[ARRAYIDX8]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP23]] +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP24]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8 +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP25]], 1 +// CHECK11-NEXT: store i64 [[ADD13]], ptr [[X]], align 4 +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4 +// CHECK11-NEXT: [[CONV14:%.*]] = sext i8 [[TMP26]] to i32 +// CHECK11-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 +// CHECK11-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i8 +// CHECK11-NEXT: store i8 [[CONV16]], ptr [[Y]], align 4 // CHECK11-NEXT: ret void // // @@ -3838,41 +4102,49 @@ // CHECK11-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i32 [[TMP1]]) +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 // CHECK11-NEXT: ret void // // @@ -3880,38 +4152,47 @@ // CHECK11-SAME: (i32 noundef [[NN:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[NN:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[NN_ADDR]]) +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[NN]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[NN:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[NN_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[NN]], ptr [[NN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[NN_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: ret void // // @@ -3919,23 +4200,28 @@ // CHECK11-SAME: (i32 noundef [[VLA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, i32 [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[VLA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[F:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 // CHECK11-NEXT: ret void // // @@ -3946,59 +4232,67 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP5]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..9, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[TMP5]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 2 +// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP3]] to i32 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: [[CONV3:%.*]] = sext i8 [[TMP11]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 -// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i8 [[CONV5]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP8]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // @@ -4011,7 +4305,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4021,48 +4315,57 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined..10, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = load double, ptr [[A3]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A3]], align 4 -// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP6]] -// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 +// CHECK11-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = load double, ptr [[A1]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP12]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A1]], align 4 +// CHECK11-NEXT: [[CONV2:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP13:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP13]] +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[ARRAYIDX3]], align 2 // CHECK11-NEXT: ret void // // @@ -4072,47 +4375,54 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @.omp_outlined..11, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i16 [[CONV2]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -314,7 +314,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -383,13 +383,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -630,7 +630,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -638,19 +638,20 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -660,50 +661,54 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -782,8 +787,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !21 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !21 @@ -826,21 +831,22 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -850,53 +856,57 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -904,21 +914,22 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -928,55 +939,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -985,27 +1000,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1015,59 +1030,65 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // @@ -1084,8 +1105,7 @@ // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1104,31 +1124,40 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1138,121 +1167,127 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK1-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK1-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK1-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK1-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK1-NEXT: ret void // // @@ -1614,7 +1649,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1624,23 +1659,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1650,72 +1690,76 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD4]], ptr [[A]], align 8 -// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8 -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A5]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD2]], ptr [[A]], align 8 +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -1727,42 +1771,41 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1776,98 +1819,108 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -1879,30 +1932,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1912,65 +1966,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -2001,7 +2061,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -2068,13 +2128,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2317,7 +2377,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2325,19 +2385,20 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2347,50 +2408,54 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2469,8 +2534,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !22 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !22 @@ -2513,21 +2578,22 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2537,53 +2603,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2591,21 +2661,22 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2615,55 +2686,59 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -2672,27 +2747,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2702,59 +2777,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // @@ -2771,8 +2852,7 @@ // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2791,31 +2871,40 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2825,121 +2914,127 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK3-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK3-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK3-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK3-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK3-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK3-NEXT: ret void // // @@ -3301,7 +3396,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3311,23 +3406,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3337,72 +3437,76 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], ptr [[A]], align 4 -// CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A5]], align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4 +// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -3414,42 +3518,41 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3463,98 +3566,108 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK3-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -3566,30 +3679,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3599,65 +3713,71 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK3-NEXT: ret void // // @@ -3674,7 +3794,7 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3682,19 +3802,20 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3704,50 +3825,54 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -3755,21 +3880,22 @@ // CHECK9-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3779,55 +3905,59 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK9-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -3836,27 +3966,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3866,59 +3996,65 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -3935,8 +4071,7 @@ // CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -3955,31 +4090,40 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK9-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3989,121 +4133,127 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 10 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK9-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK9-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK9-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK9-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK9-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK9-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK9-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] -// CHECK9-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK9-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK9-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK9-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP36:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK9-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP36]] +// CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i64 3 +// CHECK9-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK9-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK9-NEXT: store i64 [[ADD16]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK9-NEXT: store i8 [[CONV19]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK9-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK9-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK9-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK9-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK9-NEXT: ret void // // @@ -4115,42 +4265,41 @@ // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK9-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK9-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4164,98 +4313,108 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK9-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK9-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK9-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK9-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK9-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK9-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK9-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK9-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK9-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK9-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK9-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -4269,7 +4428,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4279,23 +4438,28 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4305,72 +4469,76 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK9-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: store double [[ADD4]], ptr [[A]], align 8 -// CHECK9-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8 -// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK9-NEXT: store double [[INC]], ptr [[A5]], align 8 -// CHECK9-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK9-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK9-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK9-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: store double [[ADD2]], ptr [[A]], align 8 +// CHECK9-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8 +// CHECK9-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK9-NEXT: store double [[INC]], ptr [[A3]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK9-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4380,30 +4548,31 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4413,65 +4582,71 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // @@ -4481,7 +4656,7 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -4489,19 +4664,20 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4511,50 +4687,54 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -4562,21 +4742,22 @@ // CHECK11-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4586,55 +4767,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK11-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -4643,27 +4828,27 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4673,59 +4858,65 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4742,8 +4933,7 @@ // CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4762,31 +4952,40 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4796,121 +4995,127 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 9 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP23]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP21]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP24]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double -// CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK11-NEXT: store i32 [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP33]] to double +// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK11-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD4]] to float +// CHECK11-NEXT: store float [[CONV5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK11-NEXT: [[TMP34:%.*]] = load float, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CONV7:%.*]] = fpext float [[TMP34]] to double +// CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK11-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double -// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 -// CHECK11-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK11-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK11-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] -// CHECK11-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK11-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK11-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK11-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store float [[CONV9]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP35:%.*]] = load double, ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[TMP35]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD12]], ptr [[ARRAYIDX11]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP36:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP36]] +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX13]], i32 3 +// CHECK11-NEXT: [[TMP37:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD15:%.*]] = fadd double [[TMP37]], 1.000000e+00 +// CHECK11-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], 1 +// CHECK11-NEXT: store i64 [[ADD16]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP39:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CONV17:%.*]] = sext i8 [[TMP39]] to i32 +// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 +// CHECK11-NEXT: store i8 [[CONV19]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK11-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP40]], 1 +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK11-NEXT: store i32 [[ADD26]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] +// CHECK11-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP43]], [[TMP44]] +// CHECK11-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) // CHECK11-NEXT: ret void // // @@ -4922,42 +5127,41 @@ // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK11-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK11-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4971,98 +5175,108 @@ // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK11-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK11-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK11-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK11-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD10]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK11-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK11-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK11-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK11-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK11-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK11-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -5076,7 +5290,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -5086,23 +5300,28 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5112,72 +5331,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK11-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: store double [[ADD4]], ptr [[A]], align 4 -// CHECK11-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK11-NEXT: store double [[INC]], ptr [[A5]], align 4 -// CHECK11-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK11-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK11-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK11-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: store double [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK11-NEXT: store double [[INC]], ptr [[A3]], align 4 +// CHECK11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK11-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -5187,30 +5410,31 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5220,64 +5444,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK11-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -116,34 +116,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -151,26 +151,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -182,68 +185,70 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -276,34 +281,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -311,26 +316,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -342,66 +350,68 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -430,10 +440,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -458,91 +468,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -553,8 +563,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -563,157 +572,167 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -732,34 +751,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -771,18 +790,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -794,67 +816,69 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -910,91 +934,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -1005,8 +1029,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1015,155 +1038,165 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1182,34 +1215,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1221,18 +1254,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1244,65 +1280,67 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -263,18 +263,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -284,56 +287,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -341,18 +346,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -362,56 +370,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -419,18 +429,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -440,73 +453,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -658,18 +673,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -679,55 +697,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -735,18 +755,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -756,55 +779,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -812,18 +837,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -833,72 +861,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -1172,27 +1202,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1205,75 +1238,79 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1285,27 +1322,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1318,75 +1358,79 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1399,33 +1443,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1438,94 +1484,100 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1658,18 +1710,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1679,55 +1734,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1735,18 +1792,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1756,55 +1816,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1812,18 +1874,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1833,72 +1898,74 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -2174,27 +2241,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2207,74 +2277,78 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2286,27 +2360,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2319,74 +2396,78 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2399,33 +2480,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2438,93 +2521,99 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2657,18 +2746,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2678,54 +2770,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2733,18 +2827,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2754,54 +2851,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2809,18 +2908,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2830,71 +2932,73 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -339,8 +339,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -349,138 +348,148 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -687,7 +696,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -696,23 +705,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -720,112 +733,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1124,8 +1141,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1134,136 +1150,146 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1470,7 +1496,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1479,23 +1505,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1503,110 +1533,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -1835,35 +1869,33 @@ // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1875,65 +1907,73 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -160,41 +160,39 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP0]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load volatile double, ptr [[TMP2]], align 8 -// CHECK1-NEXT: store double [[TMP3]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP7]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -211,84 +209,94 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP23]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP18:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP18]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[TMP19]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP22]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP31]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -327,8 +335,7 @@ // CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -336,26 +343,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -363,95 +373,101 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP23]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP20]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 4 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP24]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP27]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 4 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP31]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -609,8 +625,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -620,27 +635,31 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -649,28 +668,34 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -680,99 +705,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP22]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done10: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done12: // CHECK9-NEXT: ret void // // @@ -948,7 +973,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -957,23 +982,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -982,26 +1011,30 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1011,97 +1044,97 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP20]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1301,8 +1334,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1312,27 +1344,31 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1341,28 +1377,34 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1372,97 +1414,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP22]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done9: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done11: // CHECK11-NEXT: ret void // // @@ -1638,7 +1680,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1647,23 +1689,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1672,26 +1718,30 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1701,95 +1751,95 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP14]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP23]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -122,7 +122,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -132,20 +132,22 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i64 [[TMP5]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -155,90 +157,106 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -251,72 +269,80 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -324,21 +350,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] @@ -352,27 +378,30 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -382,93 +411,110 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -481,85 +527,93 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -746,7 +800,7 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -756,20 +810,22 @@ // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i64 [[TMP5]], ptr [[TMP1]]) +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -779,90 +835,106 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK2-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK2-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -875,72 +947,80 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK2: .cancel.exit: // CHECK2-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK2: .cancel.continue: @@ -948,21 +1028,21 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: cancel.exit: -// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK2-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK2-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK2: omp.precond.end: // CHECK2-NEXT: br label [[CANCEL_CONT]] @@ -976,27 +1056,30 @@ // CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1006,93 +1089,110 @@ // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK2-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK2-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1105,85 +1205,93 @@ // CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK2-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK2-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK2-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void @@ -1377,7 +1485,7 @@ // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -1387,20 +1495,22 @@ // CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i32 [[TMP5]], ptr [[TMP1]]) +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1410,88 +1520,104 @@ // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]]) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1504,69 +1630,77 @@ // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK4-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK4-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK4-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: // CHECK4-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK4: .cancel.continue: @@ -1574,21 +1708,21 @@ // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK4-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK4-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: cancel.exit: -// CHECK4-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK4-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK4-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK4: omp.precond.end: // CHECK4-NEXT: br label [[CANCEL_CONT]] @@ -1602,27 +1736,30 @@ // CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1632,91 +1769,108 @@ // CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK4-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1729,82 +1883,90 @@ // CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK4-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK4: omp.precond.then: // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK4-NEXT: br label [[COND_END:%.*]] // CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK4-NEXT: br label [[COND_END]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK4-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK4-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void @@ -1824,7 +1986,7 @@ // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -1834,20 +1996,22 @@ // CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i64 [[TMP5]], ptr [[TMP1]]) +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1857,90 +2021,106 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]]) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK10-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK10-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: store ptr [[TMP4]], ptr [[TMP28]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK10-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1953,72 +2133,80 @@ // CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK10-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK10-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK10-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK10-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK10-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK10-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK10-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK10-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK10-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: // CHECK10-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK10: .cancel.continue: @@ -2026,21 +2214,21 @@ // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK10-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK10-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: cancel.exit: -// CHECK10-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK10-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK10-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK10: omp.precond.end: // CHECK10-NEXT: br label [[CANCEL_CONT]] @@ -2054,27 +2242,30 @@ // CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK10-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2084,93 +2275,110 @@ // CHECK10-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]) +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK10-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK10-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK10-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK10-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void // // // CHECK10-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK10-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: // CHECK10-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK10-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2183,85 +2391,93 @@ // CHECK10-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK10-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK10-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK10-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK10-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK10-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK10: omp.precond.then: // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK10-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK10-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK10-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK10-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK10-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK10-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK10: cond.true: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK10-NEXT: br label [[COND_END:%.*]] // CHECK10: cond.false: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK10-NEXT: br label [[COND_END]] // CHECK10: cond.end: -// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK10-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK10-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK10-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK10-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: -// CHECK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK10-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK10-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void @@ -2274,7 +2490,7 @@ // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -2284,20 +2500,22 @@ // CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK12-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., i32 [[TMP5]], ptr [[TMP1]]) +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2307,88 +2525,104 @@ // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]]) +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: store ptr [[TMP4]], ptr [[TMP26]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK12-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2401,69 +2635,77 @@ // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]] +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK12-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP19]], i32 2) -// CHECK12-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK12-NEXT: br i1 [[TMP21]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK12-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK12-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK12-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK12: .cancel.exit: // CHECK12-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK12: .cancel.continue: @@ -2471,21 +2713,21 @@ // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK12-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK12-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: cancel.exit: -// CHECK12-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK12-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK12-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK12: omp.precond.end: // CHECK12-NEXT: br label [[CANCEL_CONT]] @@ -2499,27 +2741,30 @@ // CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2529,91 +2774,108 @@ // CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK12-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK12-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2626,82 +2888,90 @@ // CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK12-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK12: omp.precond.then: // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK12-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK12-NEXT: br label [[COND_END:%.*]] // CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK12-NEXT: br label [[COND_END]] // CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK12-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK12-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK12-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK12-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -121,34 +121,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -156,26 +156,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -183,66 +186,81 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -254,76 +272,82 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -356,34 +380,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -391,26 +415,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -418,64 +445,79 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -487,72 +529,78 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -581,10 +629,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -609,91 +657,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -704,8 +752,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -714,283 +761,316 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP37]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1009,34 +1089,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1048,18 +1128,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1067,66 +1150,81 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1138,75 +1236,81 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -1262,91 +1366,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -1357,8 +1461,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1367,285 +1470,318 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP41]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1664,34 +1800,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1703,18 +1839,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1722,64 +1861,79 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1791,71 +1945,77 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -275,83 +275,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -361,64 +379,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -426,83 +450,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -512,64 +554,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -577,103 +625,121 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -683,64 +749,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -892,81 +964,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -976,61 +1066,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1038,81 +1134,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1122,61 +1236,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1184,101 +1304,119 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1288,61 +1426,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1618,27 +1762,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1648,93 +1795,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1747,83 +1911,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1835,27 +2007,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1865,93 +2040,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1964,83 +2156,91 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2053,33 +2253,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2089,124 +2291,142 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2219,84 +2439,94 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2444,83 +2674,101 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2530,63 +2778,69 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2594,83 +2848,101 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2680,63 +2952,69 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2745,116 +3023,134 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2864,64 +3160,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -3199,27 +3503,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3229,91 +3536,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3326,80 +3650,88 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3411,27 +3743,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3441,91 +3776,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3538,80 +3890,88 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3624,33 +3984,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3660,122 +4022,140 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3788,81 +4168,91 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -4010,81 +4400,99 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4094,60 +4502,66 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4155,81 +4569,99 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4239,60 +4671,66 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4301,114 +4739,132 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4418,61 +4874,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -400,8 +400,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -410,133 +409,158 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -574,137 +598,145 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -712,10 +744,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -726,9 +758,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -790,12 +822,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -878,7 +910,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -887,23 +919,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -911,107 +947,127 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1029,16 +1085,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1046,120 +1100,128 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1179,7 +1241,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1193,7 +1255,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1209,9 +1271,9 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1445,8 +1507,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1455,131 +1516,156 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1617,133 +1703,141 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1751,10 +1845,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1765,9 +1859,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1829,12 +1923,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1917,7 +2011,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1926,23 +2020,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1950,105 +2048,125 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2066,16 +2184,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2083,116 +2199,124 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2212,7 +2336,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2226,7 +2350,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2242,9 +2366,9 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2408,35 +2532,33 @@ // CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2444,82 +2566,98 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4 +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2528,76 +2666,88 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP21]], align 4 +// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP22]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8 // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK5-NEXT: ret void // // @@ -2625,8 +2775,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -2635,133 +2784,158 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done3: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done1: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK13-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK13-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done8: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // @@ -2799,137 +2973,145 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK13-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done13: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done10: // CHECK13-NEXT: ret void // // @@ -2951,7 +3133,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -2960,23 +3142,27 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2984,107 +3170,127 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]) +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK13-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done10: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // @@ -3102,16 +3308,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3119,120 +3323,128 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done5: -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done3: +// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK13-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done14: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done11: // CHECK13-NEXT: ret void // // @@ -3314,9 +3526,9 @@ // CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -3333,8 +3545,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -3343,131 +3554,156 @@ // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK15-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done8: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // @@ -3505,133 +3741,141 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done11: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done8: // CHECK15-NEXT: ret void // // @@ -3653,7 +3897,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -3662,23 +3906,27 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3686,105 +3934,125 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK15-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done10: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // @@ -3802,16 +4070,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3819,116 +4085,124 @@ // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done12: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done9: // CHECK15-NEXT: ret void // // @@ -4010,9 +4284,9 @@ // CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK15-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -4028,35 +4302,33 @@ // CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4064,82 +4336,98 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4151,72 +4439,84 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP21]], align 4 +// CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP22]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -168,77 +168,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -248,139 +264,161 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l51 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -390,43 +428,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -434,14 +478,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -499,37 +543,37 @@ // CHECK1: omp_if.then: // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP22]] to i1 -// CHECK1-NEXT: [[TMP23:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK1-NEXT: [[TMP21:%.*]] = select i1 [[TOBOOL4]], i32 0, i32 1 // CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP25]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP23]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]]) -// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK1-NEXT: br i1 [[TMP34]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP21]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90.region_id, ptr [[KERNEL_ARGS6]]) +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]] // CHECK1: omp_offload.failed7: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]] @@ -539,85 +583,101 @@ // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l90(i64 [[TMP13]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP35]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP33]]) // CHECK1-NEXT: ret i32 [[CALL]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -627,43 +687,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -671,96 +737,112 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -770,43 +852,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -814,14 +902,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -829,100 +917,119 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -932,43 +1039,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -976,14 +1089,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1037,37 +1150,37 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP21]] to i1 -// CHECK1-NEXT: [[TMP22:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK1-NEXT: [[TMP20:%.*]] = select i1 [[TOBOOL3]], i32 0, i32 1 // CHECK1-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP23]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP24]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP22]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]]) -// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 -// CHECK1-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP17]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP20]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] // CHECK1: omp_offload.failed6: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l68(i64 [[TMP13]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT7]] @@ -1078,77 +1191,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l60 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1158,43 +1287,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1202,96 +1337,112 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l64 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1301,43 +1452,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1345,14 +1502,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1360,100 +1517,119 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1463,43 +1639,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1507,14 +1689,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -195,41 +195,39 @@ // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP1]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -237,110 +235,128 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, ptr [[TMP12]], align 8 -// CHECK1-NEXT: store double [[TMP13]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP17]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store double [[TMP19]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load volatile double, ptr [[TMP26]], align 8 +// CHECK1-NEXT: store double [[TMP27]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP31]], ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store double [[TMP33]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP26]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP29]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP38]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, ptr [[TMP39]], align 8 +// CHECK1-NEXT: store volatile double [[TMP40]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP41]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP42]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -354,94 +370,108 @@ // CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8 // CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP29]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP24]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G3]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR6]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR7]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -480,8 +510,7 @@ // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 @@ -489,26 +518,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -516,102 +548,120 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP15]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP10]], i32 [[TMP11]], ptr [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], ptr [[G2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP21]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 4 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP25]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP34]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[TMP35]], align 4 +// CHECK3-NEXT: store volatile double [[TMP36]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP37]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP38]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -619,100 +669,110 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -870,8 +930,7 @@ // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -881,27 +940,31 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -909,31 +972,38 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -943,111 +1013,123 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[SVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]], i64 [[TMP19]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP41]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP42]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done10: -// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP28]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK5-NEXT: store i32 [[TMP29]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP42]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done7: +// CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP43]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP44]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done9: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1056,36 +1138,46 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1095,99 +1187,99 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done11: +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK5-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: // CHECK5-NEXT: ret void // // @@ -1205,10 +1297,10 @@ // CHECK5-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1219,9 +1311,9 @@ // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK5-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1283,12 +1375,12 @@ // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK5: omp_offload.cont: // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1363,7 +1455,7 @@ // CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1372,23 +1464,27 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1396,136 +1492,154 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK5-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP24]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP25]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP39]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done11: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1534,134 +1648,142 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK5-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP33]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP34]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done15: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done12: // CHECK5-NEXT: ret void // // @@ -1681,7 +1803,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -1694,7 +1816,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void @@ -1861,8 +1983,7 @@ // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1872,27 +1993,31 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1900,31 +2025,38 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -1934,109 +2066,121 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP16]], ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]], i32 [[TMP17]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4 +// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP24]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP39]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP40]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done10: -// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP26]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP27]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done7: +// CHECK7-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP41]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP42:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP42]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done12: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done9: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2045,34 +2189,44 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2082,97 +2236,97 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i32 4, i1 false) // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done12: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done9: +// CHECK7-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done14: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done11: // CHECK7-NEXT: ret void // // @@ -2190,10 +2344,10 @@ // CHECK7-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK7-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2204,9 +2358,9 @@ // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK7-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK7-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2268,12 +2422,12 @@ // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK7: omp_offload.cont: // CHECK7-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2348,7 +2502,7 @@ // CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2357,23 +2511,27 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2381,134 +2539,152 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]]) +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP22]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP23]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done9: -// CHECK7-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP24]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done11: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done8: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2517,130 +2693,138 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK7-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP33]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP34]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done11: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done8: +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done13: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done10: // CHECK7-NEXT: ret void // // @@ -2660,7 +2844,7 @@ // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: store i32 0, ptr [[F]], align 4 // CHECK7-NEXT: ret void // @@ -2673,7 +2857,7 @@ // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -57,77 +57,93 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l16 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -137,57 +153,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp @@ -332,29 +332,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -370,55 +377,63 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -428,12 +443,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -448,14 +464,20 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -471,68 +493,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -545,19 +567,19 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -588,12 +610,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -629,15 +651,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -645,81 +669,94 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -728,12 +765,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -743,97 +781,103 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -857,7 +901,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -871,7 +915,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1056,29 +1100,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1094,53 +1145,61 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1150,12 +1209,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1170,14 +1230,20 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1191,66 +1257,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1263,19 +1329,19 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1306,12 +1372,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1347,15 +1413,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1363,79 +1431,92 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1444,12 +1525,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1459,93 +1541,99 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1569,7 +1657,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1583,7 +1671,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1742,15 +1830,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR5:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1758,68 +1848,82 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1832,74 +1936,80 @@ // CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 // CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // // @@ -1922,29 +2032,36 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1960,55 +2077,63 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -2028,12 +2153,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2048,14 +2174,20 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2071,68 +2203,68 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -2154,15 +2286,17 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK13-SAME: () #[[ATTR0]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2170,81 +2304,94 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2263,12 +2410,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2278,97 +2426,103 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2414,7 +2568,7 @@ // CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK13-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK13-NEXT: ret void @@ -2432,29 +2586,36 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2470,53 +2631,61 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -2536,12 +2705,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2556,14 +2726,20 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -2577,66 +2753,66 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -2658,15 +2834,17 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK15-SAME: () #[[ATTR0]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -2674,79 +2852,92 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2765,12 +2956,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -2780,93 +2972,99 @@ // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2912,7 +3110,7 @@ // CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK15-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK15-NEXT: ret void @@ -2930,15 +3128,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2946,68 +3146,82 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3023,70 +3237,76 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK17-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 // CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK17-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK17-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -114,78 +114,94 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -195,135 +211,157 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l38 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -333,57 +371,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -423,78 +467,94 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l30 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -504,57 +564,63 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp @@ -101,34 +101,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -141,191 +141,215 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -339,15 +363,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -359,15 +383,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -382,36 +406,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -423,191 +447,215 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -621,15 +669,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -641,15 +689,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -671,34 +719,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -711,187 +759,211 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -905,15 +977,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -925,15 +997,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -948,36 +1020,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -989,187 +1061,211 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1183,15 +1279,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1203,15 +1299,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1236,195 +1332,219 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP25]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -1438,15 +1558,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // @@ -1458,15 +1578,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -52,259 +52,280 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP26:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP62]], i64 [[TMP63]], ptr [[ARGC1]], ptr [[TMP64]]) +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP66]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP69]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP70]], [[TMP71]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP68]]) -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP70]], i32 1) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 -// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP79]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP73]]) +// CHECK1-NEXT: [[TMP74:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = load i32, ptr [[TMP74]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP75]], i32 1) +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP77]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP79:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 +// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP81]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP82]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP80]], [[TMP81]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP84:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP83]], [[TMP84]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP85:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP83]] to i32 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] -// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 -// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP86:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP86]] to i32 +// CHECK1-NEXT: [[TMP87:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i8 [[TMP87]] to i32 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV]], [[CONV15]] +// CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +// CHECK1-NEXT: store i8 [[CONV17]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP85]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP81]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP85]] monotonic, align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] -// CHECK1: omp.arraycpy.body23: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP89:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP88]] monotonic, align 4 +// CHECK1-NEXT: [[TMP90:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY21:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY21]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY22:%.*]] +// CHECK1: omp.arraycpy.body22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST23:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST24:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP89:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP94:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP89]], ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] -// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 -// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP93:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP89]], i8 [[TMP92]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP94]] = extractvalue { i8, i1 } [[TMP93]], 0 -// CHECK1-NEXT: [[TMP95:%.*]] = extractvalue { i8, i1 } [[TMP93]], 1 -// CHECK1-NEXT: br i1 [[TMP95]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP92:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY22]] ], [ [[TMP97:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP92]], ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP93]] to i32 +// CHECK1-NEXT: [[TMP94:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP94]] to i32 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[CONV27]], [[CONV28]] +// CHECK1-NEXT: [[CONV30:%.*]] = trunc i32 [[ADD29]] to i8 +// CHECK1-NEXT: store i8 [[CONV30]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP95:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP96:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i8 [[TMP92]], i8 [[TMP95]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP97]] = extractvalue { i8, i1 } [[TMP96]], 0 +// CHECK1-NEXT: [[TMP98:%.*]] = extractvalue { i8, i1 } [[TMP96]], 1 +// CHECK1-NEXT: br i1 [[TMP98]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] -// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP90]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY22]] +// CHECK1: omp.arraycpy.done34: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP96:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP96]]) +// CHECK1-NEXT: [[TMP99:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP99]]) // CHECK1-NEXT: ret void // // @@ -315,8 +336,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -327,12 +348,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -392,283 +413,288 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.1], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 0 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]] -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 9 -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP47]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP54]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP56]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP37]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP42]] +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP43]], i64 9 +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP44]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = add nuw i64 [[TMP49]], 1 +// CHECK1-NEXT: [[TMP51:%.*]] = mul nuw i64 [[TMP50]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP56]], align 8 // CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9 +// CHECK1-NEXT: [[TMP59:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP58]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP59]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP61]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP62]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP63]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP67]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP75]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP71]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP81]], ptr [[TMP71]]) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP73]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP77]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP82]], ptr [[TMP74]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP88]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP89]], align 8 // CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 // CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to ptr +// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP20]] to ptr // CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 // CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP101]] monotonic, align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP104]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[_TMP28]], align 1 +// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 // CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 +// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 +// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP113]]) +// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) // CHECK1-NEXT: ret void // // @@ -679,8 +705,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -691,12 +717,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -791,59 +817,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -855,38 +881,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void @@ -900,38 +926,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -413,83 +413,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -499,64 +517,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -564,83 +588,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -650,64 +692,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -715,83 +763,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -801,85 +867,91 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -887,83 +959,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -973,55 +1063,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1036,83 +1132,101 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1122,55 +1236,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1407,81 +1527,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1491,61 +1629,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1553,81 +1697,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1637,61 +1799,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1699,81 +1867,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1783,80 +1969,86 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1864,81 +2056,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1948,52 +2158,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2008,81 +2224,99 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2092,52 +2326,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2374,83 +2614,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2460,64 +2718,70 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2525,83 +2789,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2611,64 +2893,70 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2676,83 +2964,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2762,85 +3068,91 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2848,83 +3160,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2934,55 +3264,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -2997,83 +3333,101 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3083,55 +3437,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3368,81 +3728,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3452,61 +3830,67 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3514,81 +3898,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3598,61 +4000,67 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3660,81 +4068,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3744,80 +4170,86 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3825,81 +4257,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3909,52 +4359,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -3969,81 +4425,99 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4053,52 +4527,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4538,27 +5018,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4568,93 +5051,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4667,83 +5167,91 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4755,27 +5263,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4785,93 +5296,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4884,83 +5412,91 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4973,33 +5509,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5009,124 +5547,142 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5139,84 +5695,94 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -5228,27 +5794,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5258,93 +5827,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5357,72 +5943,80 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5442,33 +6036,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5478,99 +6074,117 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK13-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5583,74 +6197,84 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5894,83 +6518,101 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5980,63 +6622,69 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6044,83 +6692,101 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6130,63 +6796,69 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6195,95 +6867,113 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6293,86 +6983,94 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -6380,83 +7078,101 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6466,54 +7182,60 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6529,95 +7251,113 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6627,56 +7367,64 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7120,27 +7868,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7150,91 +7901,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7247,80 +8015,88 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7332,27 +8108,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7362,91 +8141,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7459,80 +8255,88 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7545,33 +8349,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7581,122 +8387,140 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK15-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7709,81 +8533,91 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7795,27 +8629,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7825,91 +8662,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7922,69 +8776,77 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8004,33 +8866,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8040,97 +8904,115 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK15-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8143,71 +9025,81 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8451,81 +9343,99 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8535,60 +9445,66 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8596,81 +9512,99 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8680,60 +9614,66 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8742,93 +9682,111 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8838,81 +9796,89 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -8920,81 +9886,99 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9004,51 +9988,57 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9064,93 +10054,111 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9160,53 +10168,61 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9646,27 +10662,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9676,93 +10695,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9775,83 +10811,91 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9863,27 +10907,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9893,93 +10940,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9992,83 +11056,91 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10081,33 +11153,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10117,124 +11191,142 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK17-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10247,84 +11339,94 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10336,27 +11438,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10366,93 +11471,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10465,72 +11587,80 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -10550,33 +11680,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10586,99 +11718,117 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8 +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8 +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK17-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10691,74 +11841,84 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11002,83 +12162,101 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11088,63 +12266,69 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11152,83 +12336,101 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11238,63 +12440,69 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11303,95 +12511,113 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11401,86 +12627,94 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK17-NEXT: ret void // // @@ -11488,83 +12722,101 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11574,54 +12826,60 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11637,95 +12895,113 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11735,56 +13011,64 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12228,27 +13512,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12258,91 +13545,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12355,80 +13659,88 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12440,27 +13752,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12470,91 +13785,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12567,80 +13899,88 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12653,33 +13993,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12689,122 +14031,140 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK19-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12817,81 +14177,91 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12903,27 +14273,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12933,91 +14306,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13030,69 +14420,77 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13112,33 +14510,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13148,97 +14548,115 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK19-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13251,71 +14669,81 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13559,81 +14987,99 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13643,60 +15089,66 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -13704,81 +15156,99 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13788,60 +15258,66 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -13850,93 +15326,111 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13946,81 +15440,89 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK19-NEXT: ret void // // @@ -14028,81 +15530,99 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14112,51 +15632,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14172,93 +15698,111 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14268,53 +15812,61 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -263,8 +263,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -275,24 +274,26 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i64 [[TMP5]], i64 [[TMP7]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -302,93 +303,110 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[I_CASTED]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -397,15 +415,15 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -421,106 +439,116 @@ // CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK1-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: @@ -535,27 +563,30 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -565,84 +596,102 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -655,15 +704,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -676,91 +724,99 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -972,8 +1028,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -984,24 +1039,26 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i32 [[TMP5]], i32 [[TMP7]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1011,91 +1068,108 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP19]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1104,15 +1178,15 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1128,103 +1202,113 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 [[TMP31]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK3-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK3-NEXT: store i32 [[ADD12]], ptr [[I_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK3-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: @@ -1239,27 +1323,30 @@ // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1269,82 +1356,100 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1357,15 +1462,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1378,88 +1482,96 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -1788,8 +1900,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) // CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 @@ -1800,24 +1911,26 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i64 [[TMP5]], i64 [[TMP7]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1827,93 +1940,110 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[I_CASTED]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK9-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1922,15 +2052,15 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[I:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1946,106 +2076,116 @@ // CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 8) ] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[I_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK9-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: @@ -2060,27 +2200,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i64 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2090,84 +2233,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], ptr [[TMP20]]), !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP5]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2180,15 +2341,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2201,91 +2361,99 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP14]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP15]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 8, !llvm.access.group [[ACC_GRP16]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 0 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP16]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -2305,8 +2473,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) // CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 @@ -2317,24 +2484,26 @@ // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined., i32 [[TMP5]], i32 [[TMP7]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2344,91 +2513,108 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..1, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], i32 [[TMP19]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -2437,15 +2623,15 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[I:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2461,103 +2647,113 @@ // CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 8) ] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTLINEAR_START]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTLINEAR_STEP]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTLINEAR_STEP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP22]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP25]], [[COND_TRUE]] ], [ [[TMP26]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP10]], i32 0, i32 [[TMP31]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[I_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK11-NEXT: br i1 [[TMP39]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: @@ -2572,27 +2768,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..2, i32 [[TMP2]], ptr [[TMP0]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2602,82 +2801,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 5, ptr @.omp_outlined..3, i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], ptr [[TMP0]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2690,15 +2907,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[G:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2711,88 +2927,96 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP12]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP20]], [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP8]], i32 0, i32 [[TMP28]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP34]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -121,34 +121,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -156,26 +156,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -183,58 +186,73 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -244,13 +262,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -262,79 +280,85 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -372,34 +396,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -407,26 +431,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -434,56 +461,71 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -493,13 +535,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -511,75 +553,81 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -622,39 +670,39 @@ // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 @@ -693,37 +741,37 @@ // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 @@ -753,10 +801,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -781,91 +829,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -876,8 +924,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -886,145 +933,170 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[M_CASTED]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[M_CASTED]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[M]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP37]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP38]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP39]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP40]], [[TMP41]] +// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP43]]) +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK9-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP31]], 0 -// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK9-NEXT: store i32 [[ADD22]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP32]], 0 -// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 -// CHECK9-NEXT: [[MUL25:%.*]] = mul nsw i32 [[DIV24]], 1 -// CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK9-NEXT: store i32 [[ADD26]], ptr [[J14]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP46]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1033,170 +1105,178 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP10]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP48]], 0 // CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 // CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 // CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] -// CHECK9-NEXT: store i32 [[ADD44]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1 -// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1 -// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]] -// CHECK9-NEXT: store i32 [[ADD48]], ptr [[J14]], align 4 +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1217,34 +1297,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1256,18 +1336,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1275,58 +1358,73 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !14 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1336,13 +1434,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1354,78 +1452,84 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1486,91 +1590,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 @@ -1581,8 +1685,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1591,147 +1694,172 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[N_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[M_ADDR]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[M_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[M_CASTED]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[M]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP39]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP40]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP41]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] -// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP42]], [[TMP43]] +// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP45]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK11-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK11-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK11-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK11-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK11-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP49]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1740,170 +1868,178 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP21]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP23]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 -// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 -// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] -// CHECK11-NEXT: store i32 [[ADD43]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK11-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 -// CHECK11-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 -// CHECK11-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] -// CHECK11-NEXT: store i32 [[ADD47]], ptr [[J14]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 +// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 +// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] +// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 +// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 +// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1924,34 +2060,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1963,18 +2099,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1982,56 +2121,71 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -2041,13 +2195,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2059,74 +2213,80 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -2212,13 +2372,13 @@ // CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -2227,16 +2387,16 @@ // CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0 // CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -2246,22 +2406,22 @@ // CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64 // CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2306,38 +2466,38 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 @@ -2413,13 +2573,13 @@ // CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -2428,16 +2588,16 @@ // CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0 // CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0 // CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -2447,20 +2607,20 @@ // CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1 -// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2505,36 +2665,36 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -275,75 +275,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -352,13 +370,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -368,67 +386,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -440,75 +464,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -517,13 +559,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -533,67 +575,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -605,95 +653,113 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -702,13 +768,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -718,67 +784,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -934,73 +1006,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1009,13 +1099,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1025,64 +1115,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1094,73 +1190,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1169,13 +1283,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1185,64 +1299,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1254,93 +1374,111 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1349,13 +1487,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1365,64 +1503,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1949,27 +2093,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1979,84 +2126,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2069,15 +2234,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2090,89 +2254,97 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2190,27 +2362,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2220,84 +2395,102 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2310,15 +2503,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2331,89 +2523,97 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2432,33 +2632,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2468,114 +2670,133 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) +// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK9-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK9-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 // CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -2588,16 +2809,15 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2610,90 +2830,100 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -2847,75 +3077,93 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2924,13 +3172,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2940,66 +3188,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3011,75 +3265,93 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3088,13 +3360,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3104,66 +3376,72 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3176,107 +3454,125 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP29]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP30]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3285,14 +3581,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3302,67 +3598,75 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3644,27 +3948,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3674,82 +3981,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3762,15 +4087,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3783,86 +4107,94 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3880,27 +4212,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3910,82 +4245,100 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3998,15 +4351,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4019,86 +4371,94 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -4117,33 +4477,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4153,112 +4515,131 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]), !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK11-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -4271,16 +4652,15 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4293,87 +4673,97 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -4527,73 +4917,91 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4602,13 +5010,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4618,63 +5026,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4686,73 +5100,91 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4761,13 +5193,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4777,63 +5209,69 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4846,105 +5284,123 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4953,14 +5409,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4970,64 +5426,72 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -398,8 +398,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -408,140 +407,165 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -579,144 +603,152 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -724,10 +756,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -738,9 +770,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -802,12 +834,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -890,7 +922,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -899,23 +931,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -923,114 +959,134 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1048,16 +1104,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1065,127 +1119,135 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1205,7 +1267,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1219,7 +1281,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1235,9 +1297,9 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1471,8 +1533,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1481,138 +1542,163 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1650,140 +1736,148 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1791,10 +1885,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1805,9 +1899,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1869,12 +1963,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1957,7 +2051,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1966,23 +2060,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1990,112 +2088,132 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2113,16 +2231,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2130,123 +2246,131 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2266,7 +2390,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2280,7 +2404,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2296,9 +2420,9 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2462,35 +2586,33 @@ // CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2498,72 +2620,87 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK5-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2572,15 +2709,16 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2589,79 +2727,91 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK5-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3467,8 +3617,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -3477,140 +3626,165 @@ // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done3: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done1: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK13-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK13-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: store ptr [[VAR]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK13-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done8: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done5: // CHECK13-NEXT: ret void // // @@ -3648,144 +3822,152 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK13-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK13-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK13-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done13: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done10: // CHECK13-NEXT: ret void // // @@ -3807,7 +3989,7 @@ // CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -3816,23 +3998,27 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3840,114 +4026,134 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done4: -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done2: +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[VEC]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store ptr [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done10: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done7: // CHECK13-NEXT: ret void // // @@ -3965,16 +4171,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3982,127 +4186,135 @@ // CHECK13-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK13-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK13-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK13-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK13-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK13-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK13-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK13: omp.arraycpy.body: -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK13: omp.arraycpy.done5: -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) #[[ATTR4]] -// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR5]] -// CHECK13-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK13-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK13-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK13: omp.arraycpy.done3: +// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK13-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) #[[ATTR4]] +// CHECK13-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR5]] +// CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK13-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: -// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK13-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK13: arraydestroy.done14: +// CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK13: arraydestroy.done11: // CHECK13-NEXT: ret void // // @@ -4184,9 +4396,9 @@ // CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK13-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -4203,8 +4415,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -4213,138 +4424,163 @@ // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4:[0-9]+]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK15-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK15-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done8: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done5: // CHECK15-NEXT: ret void // // @@ -4382,140 +4618,148 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] // CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done3: -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR5]] -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done1: +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR5]] +// CHECK15-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK15-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done11: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done8: // CHECK15-NEXT: ret void // // @@ -4537,7 +4781,7 @@ // CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -4546,23 +4790,27 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4570,112 +4818,132 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK15-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done10: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done7: // CHECK15-NEXT: ret void // // @@ -4693,16 +4961,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4710,123 +4976,131 @@ // CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK15-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK15-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK15-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK15-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK15-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK15-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) #[[ATTR4]] // CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR5]] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK15: omp.arraycpy.done4: -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) #[[ATTR4]] -// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR5]] -// CHECK15-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK15: omp.arraycpy.done2: +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK15-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) #[[ATTR4]] +// CHECK15-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR5]] +// CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK15-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: -// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK15-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK15: arraydestroy.done12: +// CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK15: arraydestroy.done9: // CHECK15-NEXT: ret void // // @@ -4908,9 +5182,9 @@ // CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK15-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK15-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -4926,35 +5200,33 @@ // CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -4962,72 +5234,87 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr @g, align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5036,15 +5323,16 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -5056,76 +5344,88 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -187,81 +187,96 @@ // CHECK1-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -270,13 +285,14 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -286,62 +302,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: store i32 0, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -352,75 +376,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -429,12 +468,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -444,43 +484,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP21]] @@ -488,17 +534,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -608,70 +654,85 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -680,12 +741,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -695,43 +757,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP27]] @@ -739,17 +807,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -760,75 +828,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -837,12 +920,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -852,43 +936,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP33]] @@ -896,17 +986,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -918,93 +1008,111 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1013,12 +1121,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1028,43 +1137,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -1072,17 +1187,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1181,70 +1296,85 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..13, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1253,12 +1383,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1268,43 +1399,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP45]] @@ -1312,17 +1449,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1333,75 +1470,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..14) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1410,12 +1562,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1425,43 +1578,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP51]] @@ -1469,17 +1628,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1491,93 +1650,111 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, i64 [[TMP1]]) +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..17, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: call void @.omp_outlined..17(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void @.omp_outlined..17(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1586,12 +1763,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1601,43 +1779,49 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -1645,17 +1829,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1749,81 +1933,96 @@ // CHECK3-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1832,13 +2031,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1848,62 +2048,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: store i32 0, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1914,75 +2122,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1991,12 +2214,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2006,43 +2230,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP22]] @@ -2050,17 +2280,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2170,70 +2400,85 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2242,12 +2487,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2257,43 +2503,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP28]] @@ -2301,17 +2553,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2322,75 +2574,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2399,12 +2666,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2414,43 +2682,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2458,17 +2732,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2480,147 +2754,172 @@ // CHECK3-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK3: omp_if.then4: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3: omp_if.then6: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP22]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK3: omp_if.else5: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK3: omp.inner.for.cond6: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK3: omp.inner.for.body8: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK3-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK3: omp_if.then13: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK3-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK3: omp_if.else14: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_IF_END16]] -// CHECK3: omp_if.end16: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK3: omp.inner.for.inc17: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP37:![0-9]+]] -// CHECK3: omp.inner.for.end19: -// CHECK3-NEXT: br label [[OMP_IF_END20]] -// CHECK3: omp_if.end20: +// CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK3: omp_if.else7: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK3: omp.inner.for.cond8: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK3: omp.inner.for.body10: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK3-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK3-NEXT: store i64 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP36:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP35]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP37]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK3: omp_if.then15: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK3: omp_if.else16: +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP38]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: br label [[OMP_IF_END18]] +// CHECK3: omp_if.end18: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK3: omp.inner.for.inc19: +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK3: omp.inner.for.end21: +// CHECK3-NEXT: br label [[OMP_IF_END22]] +// CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK3-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2629,13 +2928,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2645,48 +2945,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP38]] @@ -2694,60 +3004,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP41:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2756,13 +3066,14 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2772,48 +3083,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP42]] @@ -2821,60 +3142,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP45:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2973,70 +3294,85 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..14, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3045,12 +3381,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3060,43 +3397,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP49]] @@ -3104,17 +3447,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3125,75 +3468,90 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..15) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3202,12 +3560,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3217,43 +3576,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3261,17 +3626,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3283,93 +3648,111 @@ // CHECK3-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, i64 [[TMP1]]) +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: call void @.omp_outlined..18(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void @.omp_outlined..18(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3378,12 +3761,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3393,43 +3777,49 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK3-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK3-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -3437,17 +3827,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4134,81 +4524,96 @@ // CHECK9-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4217,13 +4622,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4233,62 +4639,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: store i32 0, ptr [[ARG]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4299,75 +4713,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4376,12 +4805,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4391,43 +4821,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP21]] @@ -4435,17 +4871,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4555,70 +4991,85 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4627,12 +5078,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4642,43 +5094,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP27]] @@ -4686,17 +5144,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4707,75 +5165,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4784,12 +5257,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4799,43 +5273,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP33]] @@ -4843,17 +5323,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4865,93 +5345,111 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4960,12 +5458,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4975,43 +5474,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -5019,17 +5524,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5128,70 +5633,85 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..13, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5200,12 +5720,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5215,43 +5736,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP45]] @@ -5259,17 +5786,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5280,75 +5807,90 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..14) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP48]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP48]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5357,12 +5899,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5372,43 +5915,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP51]] @@ -5416,17 +5965,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5438,93 +5987,111 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, i64 [[TMP1]]) +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..17, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: call void @.omp_outlined..17(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void @.omp_outlined..17(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5533,12 +6100,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5548,43 +6116,49 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -5592,17 +6166,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5696,81 +6270,96 @@ // CHECK11-SAME: (i64 noundef [[ARG:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[ARG_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., i64 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[ARG]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[ARG_CASTED]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[ARG_CASTED]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5779,13 +6368,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARG:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[ARG:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5795,62 +6385,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[ARG]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: store i32 0, ptr [[ARG_ADDR]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: store i32 0, ptr [[ARG]], align 4, !nontemporal !10, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5861,75 +6459,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l53 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5938,12 +6551,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5953,43 +6567,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP22]] @@ -5997,17 +6617,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6117,70 +6737,85 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l78 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6189,12 +6824,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6204,43 +6840,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP28]] @@ -6248,17 +6890,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6269,75 +6911,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l85 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6346,12 +7003,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6361,43 +7019,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -6405,17 +7069,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6427,147 +7091,172 @@ // CHECK11-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP1]]) +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] // CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK11: omp_if.then4: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK11-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK11-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP21]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11: omp_if.then6: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP22]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK11: omp_if.else5: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK11: omp.inner.for.cond6: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK11: omp.inner.for.body8: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK11-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK11: omp_if.then13: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK11-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK11: omp_if.else14: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: br label [[OMP_IF_END16]] -// CHECK11: omp_if.end16: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK11: omp.inner.for.inc17: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP37:![0-9]+]] -// CHECK11: omp.inner.for.end19: -// CHECK11-NEXT: br label [[OMP_IF_END20]] -// CHECK11: omp_if.end20: +// CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK11: omp_if.else7: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK11: omp.inner.for.cond8: +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK11: omp.inner.for.body10: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK11-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = zext i32 [[TMP29]] to i64 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP34]], ptr [[TMP33]], align 8 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP36:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP36]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP35]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP37]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK11: omp_if.then15: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK11: omp_if.else16: +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP38]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: br label [[OMP_IF_END18]] +// CHECK11: omp_if.end18: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK11: omp.inner.for.inc19: +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK11: omp.inner.for.end21: +// CHECK11-NEXT: br label [[OMP_IF_END22]] +// CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK11-NEXT: br i1 [[TMP42]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6576,13 +7265,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6592,48 +7282,58 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP38]] @@ -6641,60 +7341,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP41:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6703,13 +7403,14 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6719,48 +7420,58 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP42]] @@ -6768,60 +7479,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP45:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6920,70 +7631,85 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..14, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK11-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6992,12 +7718,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7007,43 +7734,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP49]] @@ -7051,17 +7784,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7072,75 +7805,90 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l66 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..15) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP16]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7149,12 +7897,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7164,43 +7913,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -7208,17 +7963,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7230,93 +7985,111 @@ // CHECK11-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, i64 [[TMP1]]) +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[TMP0]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..17, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..17 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: call void @.omp_outlined..18(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void @.omp_outlined..18(ptr [[TMP19]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7325,12 +8098,13 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7340,43 +8114,49 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK11-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK11-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP57]] @@ -7384,17 +8164,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -195,41 +195,39 @@ // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load volatile double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: store double [[TMP1]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP7]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load volatile double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -237,117 +235,135 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store double [[TMP8]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP13]], ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float [[TMP17]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double [[TMP19]], ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = load volatile double, ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP27]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: br i1 [[TMP39]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8 -// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP31]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP40]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load double, ptr [[TMP41]], align 8 +// CHECK1-NEXT: store volatile double [[TMP42]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP43]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP44]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]], i64 noundef [[G:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -361,101 +377,115 @@ // CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store double [[TMP6]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store double [[TMP12]], ptr [[G]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP26]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G3]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR6]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR7]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -494,8 +524,7 @@ // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 @@ -503,26 +532,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -530,109 +562,127 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store float [[TMP6]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float [[TMP15]], ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP10]], i32 [[TMP11]], ptr [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], ptr [[G2]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 4 -// CHECK3-NEXT: store volatile double [[TMP25]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP27]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP36]], ptr [[TMP8]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load double, ptr [[TMP37]], align 4 +// CHECK3-NEXT: store volatile double [[TMP38]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP39]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP40]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -640,107 +690,117 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store float [[TMP10]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP1]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP39]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -898,8 +958,7 @@ // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -909,27 +968,31 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -937,31 +1000,38 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -971,118 +1041,130 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SVAR7]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: store i32 [[TMP18]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]], i64 [[TMP19]]), !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP32:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store ptr [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK5-NEXT: br i1 [[TMP42]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP43]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP44:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP44]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP29]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done10: -// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP30]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK5-NEXT: store i32 [[TMP31]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP44]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done7: +// CHECK5-NEXT: [[TMP45:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP45]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP46:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK5-NEXT: store i32 [[TMP46]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP47]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done12: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done9: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1091,36 +1173,46 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: @@ -1130,106 +1222,106 @@ // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP28]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done14: -// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], ptr [[SVAR_ADDR]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done11: +// CHECK5-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK5-NEXT: store i32 [[TMP40]], ptr [[SVAR]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done16: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done13: // CHECK5-NEXT: ret void // // @@ -1247,10 +1339,10 @@ // CHECK5-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK5-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK5-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1261,9 +1353,9 @@ // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK5-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK5-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK5-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK5-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK5-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1325,12 +1417,12 @@ // CHECK5-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK5: omp_offload.cont: // CHECK5-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: // CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1405,7 +1497,7 @@ // CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1414,23 +1506,27 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1438,143 +1534,161 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK5-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC3]], i64 [[TMP16]], ptr [[S_ARR4]], ptr [[TMP17]]), !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK5-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK5-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store i64 [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK5-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK5-NEXT: store i32 [[TMP39]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN5]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP40]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done9: -// CHECK5-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP41]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP42]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done11: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done8: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1583,141 +1697,149 @@ // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK5-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK5-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK5-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK5-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK5-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK5-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK5-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK5: arrayctor.loop: // CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK5-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK5-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK5-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK5-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK5-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR3]], align 4 +// CHECK5-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[TMP10]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done13: -// CHECK5-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done10: +// CHECK5-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK5-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK5-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK5: arraydestroy.done15: +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK5: arraydestroy.done12: // CHECK5-NEXT: ret void // // @@ -1737,7 +1859,7 @@ // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: store i32 0, ptr [[F]], align 4 // CHECK5-NEXT: ret void // @@ -1750,7 +1872,7 @@ // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK5-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK5-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK5-NEXT: ret void @@ -1917,8 +2039,7 @@ // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1928,27 +2049,31 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1956,31 +2081,38 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -1990,116 +2122,128 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[SVAR7]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: store i32 [[TMP16]], ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]], i32 [[TMP17]]), !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK7-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP26]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP41]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP42:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP42]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done10: -// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP28]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP29]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP42]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done7: +// CHECK7-NEXT: [[TMP43:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP43]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP44:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP44]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP45]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done12: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done9: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2108,34 +2252,44 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[SVAR]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: @@ -2145,104 +2299,104 @@ // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done12: -// CHECK7-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP28]], ptr [[SVAR_ADDR]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done9: +// CHECK7-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK7-NEXT: store i32 [[TMP40]], ptr [[SVAR]], align 4 // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done14: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done11: // CHECK7-NEXT: ret void // // @@ -2260,10 +2414,10 @@ // CHECK7-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK7-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK7-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2274,9 +2428,9 @@ // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK7-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK7-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK7-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK7-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2338,12 +2492,12 @@ // CHECK7-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK7: omp_offload.cont: // CHECK7-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: // CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2418,7 +2572,7 @@ // CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2427,23 +2581,27 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2451,141 +2609,159 @@ // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC3]], i32 [[TMP14]], ptr [[S_ARR4]], ptr [[TMP15]]), !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK7-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK7-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP24]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP37]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN5]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done9: -// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP39]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP40]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done11: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done8: // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2594,137 +2770,145 @@ // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK7-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK7-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK7-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK7-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK7-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK7-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK7-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK7-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK7: arrayctor.loop: // CHECK7-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK7-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK7-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK7-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK7: arrayctor.cont: -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK7-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK7-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK7-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK7-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK7-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK7-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK7: .omp.lastprivate.then: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP25]], ptr [[T_VAR_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK7-NEXT: store i32 [[TMP35]], ptr [[T_VAR]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done11: -// CHECK7-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done8: +// CHECK7-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK7-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK7: .omp.lastprivate.done: -// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK7-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK7-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK7: arraydestroy.body: -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK7-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK7-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK7: arraydestroy.done13: +// CHECK7-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK7-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK7: arraydestroy.done10: // CHECK7-NEXT: ret void // // @@ -2744,7 +2928,7 @@ // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: store i32 0, ptr [[F]], align 4 // CHECK7-NEXT: ret void // @@ -2757,7 +2941,7 @@ // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK7-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp @@ -332,29 +332,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -370,62 +377,70 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -435,12 +450,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -455,14 +471,20 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -478,75 +500,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -559,19 +581,19 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -602,12 +624,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -643,15 +665,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -659,88 +683,101 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -749,12 +786,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -764,104 +802,110 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -885,7 +929,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -899,7 +943,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1084,29 +1128,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1122,60 +1173,68 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1185,12 +1244,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1205,14 +1265,20 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1226,73 +1292,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1305,19 +1371,19 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1348,12 +1414,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1389,15 +1455,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1405,86 +1473,99 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1493,12 +1574,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1508,100 +1590,106 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1625,7 +1713,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1639,7 +1727,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1798,15 +1886,17 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK5-SAME: () #[[ATTR5:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1814,61 +1904,74 @@ // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK5-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1877,12 +1980,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1895,77 +1999,83 @@ // CHECK5-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK5-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK5-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2876,29 +2986,36 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK13-SAME: () #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK13-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2914,62 +3031,70 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK13-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -2989,12 +3114,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3009,14 +3135,20 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3032,75 +3164,75 @@ // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK13-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -3122,15 +3254,17 @@ // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK13-SAME: () #[[ATTR0]] { // CHECK13-NEXT: entry: -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3138,88 +3272,101 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK13-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3238,12 +3385,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3253,104 +3401,110 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK13-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK13-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK13-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK13-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK13-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK13: arrayctor.loop: // CHECK13-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK13-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK13-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK13-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK13: arrayctor.cont: // CHECK13-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK13-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK13-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK13-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 2, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK13: .omp.final.done: // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK13-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK13-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK13: arraydestroy.body: -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK13-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK13-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK13-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK13-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3396,7 +3550,7 @@ // CHECK13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK13-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK13-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK13-NEXT: ret void @@ -3414,29 +3568,36 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124 // CHECK15-SAME: () #[[ATTR0:[0-9]+]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK15-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3452,60 +3613,68 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5:[0-9]+]] // CHECK15-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -3525,12 +3694,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3545,14 +3715,20 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -3566,73 +3742,73 @@ // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -3654,15 +3830,17 @@ // CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80 // CHECK15-SAME: () #[[ATTR0]] { // CHECK15-NEXT: entry: -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -3670,86 +3848,99 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK15-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3768,12 +3959,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -3783,100 +3975,106 @@ // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK15-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK15-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK15: arrayctor.loop: // CHECK15-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) #[[ATTR4]] -// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK15-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK15-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK15-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK15: arrayctor.cont: // CHECK15-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK15-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK15-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK15-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 2, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK15: .omp.final.done: // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] -// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK15-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK15-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK15: arraydestroy.body: -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK15-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK15-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] // CHECK15-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK15-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -3922,7 +4120,7 @@ // CHECK15-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK15-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK15-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK15-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK15-NEXT: ret void @@ -3940,15 +4138,17 @@ // CHECK17-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104 // CHECK17-SAME: () #[[ATTR0:[0-9]+]] { // CHECK17-NEXT: entry: -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -3956,61 +4156,74 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4019,12 +4232,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -4040,74 +4254,80 @@ // CHECK17-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK17-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK17-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 2, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -114,71 +114,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -187,12 +202,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -202,60 +218,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -266,71 +288,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l38 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -339,12 +376,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -354,60 +392,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -451,71 +495,86 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l30 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -524,12 +583,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -539,60 +599,66 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -628,22 +694,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 @@ -653,22 +719,22 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV5]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK3: omp.inner.for.cond7: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK3: omp.inner.for.body9: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK3: omp.body.continue12: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK3: omp.inner.for.inc13: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end15: // CHECK3-NEXT: store i32 1000, ptr [[I6]], align 4 @@ -690,22 +756,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -101,34 +101,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -141,205 +141,229 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -353,15 +377,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -373,15 +397,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -396,36 +420,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -437,205 +461,229 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -649,15 +697,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -669,15 +717,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -699,34 +747,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l66(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -739,201 +787,225 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -947,15 +1019,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -967,15 +1039,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -990,36 +1062,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1031,201 +1103,225 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1239,15 +1335,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1259,15 +1355,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1292,209 +1388,233 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !4 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP27]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group !8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[SIVAR2]], ptr [[TMP17]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK5-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP27]] monotonic, align 4 // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -1508,15 +1628,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // @@ -1528,15 +1648,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK5-NEXT: ret void // // @@ -1565,26 +1685,26 @@ // CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 @@ -1611,37 +1731,37 @@ // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK7-NEXT: ret i32 0 // @@ -1664,26 +1784,26 @@ // CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK9-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 @@ -1710,37 +1830,37 @@ // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK9-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK9-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -413,75 +413,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -490,13 +508,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -506,67 +524,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -578,75 +602,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -655,13 +697,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -671,67 +713,73 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -743,75 +791,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -820,13 +886,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -836,88 +902,94 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -929,75 +1001,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1006,13 +1096,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1022,55 +1112,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1078,9 +1174,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1092,75 +1188,93 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1169,13 +1283,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1185,55 +1299,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1241,9 +1361,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1477,73 +1597,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1552,13 +1690,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1568,64 +1706,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1637,73 +1781,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1712,13 +1874,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1728,64 +1890,70 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1797,73 +1965,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1872,13 +2058,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1888,83 +2074,89 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1976,73 +2168,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2051,13 +2261,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2067,52 +2277,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2120,9 +2336,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2134,73 +2350,91 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2209,13 +2443,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2225,52 +2459,58 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2278,9 +2518,9 @@ // CHECK3: omp.dispatch.inc: // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2514,75 +2754,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2591,13 +2849,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2607,67 +2865,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2679,76 +2943,94 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK5: .omp.final.then: +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: @@ -2756,13 +3038,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2772,67 +3054,73 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2844,75 +3132,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2921,13 +3227,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2937,88 +3243,94 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3030,75 +3342,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3107,13 +3437,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3123,55 +3453,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3179,9 +3515,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3193,75 +3529,93 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3270,13 +3624,13 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3286,55 +3640,61 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3342,9 +3702,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3578,73 +3938,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3653,13 +4031,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3669,64 +4047,70 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3738,73 +4122,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3813,13 +4215,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3829,64 +4231,70 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3898,73 +4306,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3973,13 +4399,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3989,83 +4415,89 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK7-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4077,73 +4509,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4152,13 +4602,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4168,52 +4618,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4221,9 +4677,9 @@ // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK7-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4235,73 +4691,91 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4310,13 +4784,13 @@ // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4326,52 +4800,58 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4379,9 +4859,9 @@ // CHECK7: omp.dispatch.inc: // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK7-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK7-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5203,27 +5683,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5233,84 +5716,102 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5323,15 +5824,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5344,89 +5844,97 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5444,27 +5952,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5474,84 +5985,102 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5564,15 +6093,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5585,89 +6113,97 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5686,33 +6222,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5722,114 +6260,133 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]), !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) -// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK13-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) +// CHECK13-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK13-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK13-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 // CHECK13-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK13-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -5842,16 +6399,15 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5864,90 +6420,100 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -5965,27 +6531,30 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5995,84 +6564,102 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6085,15 +6672,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6106,72 +6692,80 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6179,12 +6773,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -6203,33 +6797,35 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6239,89 +6835,108 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK13-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK13-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6334,16 +6949,15 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6356,74 +6970,84 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK13-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6431,12 +7055,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -6679,75 +7303,93 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6756,13 +7398,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6772,66 +7414,72 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6843,75 +7491,93 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6920,13 +7586,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6936,66 +7602,72 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7008,86 +7680,104 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7096,14 +7786,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7113,89 +7803,97 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7207,75 +7905,93 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK13-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7284,13 +8000,13 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7300,54 +8016,60 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7355,9 +8077,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7370,86 +8092,104 @@ // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK13-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK13-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7458,14 +8198,14 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7475,56 +8215,64 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK13-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7532,9 +8280,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7975,27 +8723,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8005,82 +8756,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8093,15 +8862,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8114,86 +8882,94 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK15-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8211,27 +8987,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8241,82 +9020,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8329,15 +9126,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8350,86 +9146,94 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK15-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8448,33 +9252,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8484,112 +9290,131 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]), !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK15-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK15-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK15-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK15-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK15-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK15-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK15-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK15-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -8602,16 +9427,15 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8624,87 +9448,97 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK15-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK15-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -8722,27 +9556,30 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8752,82 +9589,100 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK15-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8840,15 +9695,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8861,69 +9715,77 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 35, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8931,12 +9793,12 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK15-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK15-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK15-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK15-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -8955,33 +9817,35 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8991,87 +9855,106 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK15-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK15-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -9084,16 +9967,15 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9106,71 +9988,81 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9178,12 +10070,12 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK15-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK15-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK15-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -9426,73 +10318,91 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9501,13 +10411,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9517,63 +10427,69 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9585,73 +10501,91 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9660,13 +10594,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9676,63 +10610,69 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK15-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9745,84 +10685,102 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9831,14 +10789,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9848,84 +10806,92 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK15-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK15-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9937,73 +10903,91 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK15-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK15-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10012,13 +10996,13 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10028,51 +11012,57 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -10080,9 +11070,9 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK15-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10095,84 +11085,102 @@ // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK15-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10181,14 +11189,14 @@ // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10198,53 +11206,61 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -10252,9 +11268,9 @@ // CHECK15: omp.dispatch.inc: // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK15-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK15-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK15: .omp.final.then: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 // CHECK15-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10691,27 +11707,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10721,84 +11740,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10811,15 +11848,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10832,89 +11868,97 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -10932,27 +11976,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10962,84 +12009,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11052,15 +12117,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11073,89 +12137,97 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -11174,33 +12246,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11210,114 +12284,133 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP23]]), !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i64 [[TMP28]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i32 [[TMP36]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE11]] ], [ [[TMP33]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE11]] ], [ [[TMP46]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK17-NEXT: store i32 [[TMP34]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK17-NEXT: store i32 [[TMP47]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) -// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK17-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP48:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP48]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP49]]) +// CHECK17-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 +// CHECK17-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP39]], 0 +// CHECK17-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP52]], 0 // CHECK17-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -11330,16 +12423,15 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11352,90 +12444,100 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK17-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -11453,27 +12555,30 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11483,84 +12588,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11573,15 +12696,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11594,72 +12716,80 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11667,12 +12797,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -11691,33 +12821,35 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11727,89 +12859,108 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i64 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i64 [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: store i32 [[TMP35]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK17-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11822,16 +12973,15 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11844,74 +12994,84 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11919,12 +13079,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -12167,75 +13327,93 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12244,13 +13422,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12260,66 +13438,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12331,75 +13515,93 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12408,13 +13610,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12424,66 +13626,72 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12496,86 +13704,104 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12584,14 +13810,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12601,89 +13827,97 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12695,75 +13929,93 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12772,13 +14024,13 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12788,54 +14040,60 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12843,9 +14101,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12858,86 +14116,104 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP2]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: store i64 [[TMP17]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: store i64 [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12946,14 +14222,14 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12963,56 +14239,64 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -13020,9 +14304,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13463,27 +14747,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13493,82 +14780,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13581,15 +14886,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13602,86 +14906,94 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -13699,27 +15011,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13729,82 +15044,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13817,15 +15150,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13838,86 +15170,94 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -13936,33 +15276,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13972,112 +15314,131 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP21]]), !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP34]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP32]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP45]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP46:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, ptr [[TMP46]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP47]]) +// CHECK19-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK19-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK19-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 // CHECK19-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -14090,16 +15451,15 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14112,87 +15472,97 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP18]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK19-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -14210,27 +15580,30 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14240,82 +15613,100 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14328,15 +15719,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14349,69 +15739,77 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 1073741859, i32 [[TMP8]], i32 [[TMP9]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP13]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14419,12 +15817,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -14443,33 +15841,35 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14479,87 +15879,106 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[N]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP36:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP36]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP37]]) +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP39:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK19-NEXT: br i1 [[TMP39]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP40]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -14572,16 +15991,15 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14594,71 +16012,81 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14666,12 +16094,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14914,73 +16342,91 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14989,13 +16435,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15005,63 +16451,69 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15073,73 +16525,91 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15148,13 +16618,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15164,63 +16634,69 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15233,84 +16709,102 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15319,14 +16813,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15336,84 +16830,92 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15425,73 +16927,91 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15500,13 +17020,13 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15516,51 +17036,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15568,9 +17094,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15583,84 +17109,102 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15669,14 +17213,14 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15686,53 +17230,61 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15740,9 +17292,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp @@ -271,15 +271,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -294,6 +296,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -309,68 +313,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -467,15 +471,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -491,6 +497,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -508,65 +516,65 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -790,15 +798,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -813,6 +823,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -828,66 +840,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -984,15 +996,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1008,6 +1022,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1025,63 +1041,63 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1279,15 +1295,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74 // CHECK9-SAME: () #[[ATTR5:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1303,63 +1321,65 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp @@ -89,34 +89,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -129,19 +129,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -152,74 +155,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -233,15 +238,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -256,36 +261,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -297,19 +302,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -320,74 +328,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -401,15 +411,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -431,34 +441,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -471,19 +481,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -494,74 +507,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -575,15 +590,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -598,36 +613,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -639,19 +654,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -662,74 +680,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -743,15 +763,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -776,19 +796,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -800,77 +823,79 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -884,15 +909,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -313,7 +313,7 @@ // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -380,13 +380,13 @@ // CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK1-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -616,7 +616,7 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -624,19 +624,20 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -646,53 +647,57 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK1-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -775,8 +780,8 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !26 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 @@ -818,21 +823,22 @@ // CHECK1-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -842,58 +848,62 @@ // CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[A1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 10, ptr [[A]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void @@ -903,21 +913,22 @@ // CHECK1-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -927,58 +938,62 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -991,27 +1006,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1021,62 +1036,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1096,7 +1117,7 @@ // CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1114,27 +1135,36 @@ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1144,107 +1174,111 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK1-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK1-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK1-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK1-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK1-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK1-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK1-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK1-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK1-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK1-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK1-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK1-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK1-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK1-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1610,7 +1644,7 @@ // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1620,23 +1654,28 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1646,75 +1685,79 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD2]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1730,42 +1773,41 @@ // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK1-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1779,111 +1821,121 @@ // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK1-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK1-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK1-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK1-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK1-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK1-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK1-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: @@ -1898,30 +1950,31 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1931,68 +1984,74 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 10, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2027,7 +2086,7 @@ // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -2092,13 +2151,13 @@ // CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK3-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -2330,7 +2389,7 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2338,19 +2397,20 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2360,53 +2420,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK3-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2489,8 +2553,8 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 @@ -2532,21 +2596,22 @@ // CHECK3-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2556,58 +2621,62 @@ // CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[A1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 10, ptr [[A]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: ret void @@ -2617,21 +2686,22 @@ // CHECK3-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2641,58 +2711,62 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK3-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2705,27 +2779,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2735,62 +2809,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2810,7 +2890,7 @@ // CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2828,27 +2908,36 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2858,107 +2947,111 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK3-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK3-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK3-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK3-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK3-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK3-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK3-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK3-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK3-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK3-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK3-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK3-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK3-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK3-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK3-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3324,7 +3417,7 @@ // CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3334,23 +3427,28 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..12, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3360,75 +3458,79 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3444,42 +3546,41 @@ // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK3-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3493,111 +3594,121 @@ // CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK3-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK3-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK3-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK3-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK3-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK3-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK3-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK3-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK3-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK3-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK3-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: @@ -3612,30 +3723,31 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3645,68 +3757,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 10, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3741,7 +3859,7 @@ // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8 // CHECK5-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8 -// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK5-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_CASTED4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 8 @@ -3808,13 +3926,13 @@ // CHECK5-NEXT: store ptr null, ptr [[TMP23]], align 8 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK5-NEXT: [[TMP27:%.*]] = load i16, ptr [[AA]], align 2 // CHECK5-NEXT: store i16 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 // CHECK5-NEXT: [[TMP32:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i64 120, i64 12, ptr @.omp_task_entry., i64 -1) @@ -4044,7 +4162,7 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4052,19 +4170,20 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK5-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4074,53 +4193,57 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK5-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4203,8 +4326,8 @@ // CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK5-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !26 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 @@ -4246,21 +4369,22 @@ // CHECK5-SAME: (i64 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4270,58 +4394,62 @@ // CHECK5-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal !27 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !27 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !27 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal !27 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 10, ptr [[A]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void @@ -4331,21 +4459,22 @@ // CHECK5-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4355,58 +4484,62 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK5-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK5-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4419,27 +4552,27 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4449,62 +4582,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4524,7 +4663,7 @@ // CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4542,27 +4681,36 @@ // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK5-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK5-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4572,107 +4720,111 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK5-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK5-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK5-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK5-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK5-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK5-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK5-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK5-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK5-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK5-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK5-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK5-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK5-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK5-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK5-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK5-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK5-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK5-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK5-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK5-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK5-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK5-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK5-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK5-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5056,8 +5208,7 @@ // CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5068,29 +5219,34 @@ // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..12, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]], i64 [[TMP7]]) +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5100,118 +5256,126 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK5-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK5-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK5-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK5-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD3]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK5-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK5-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK5: omp.inner.for.cond9: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK5: omp.inner.for.body11: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK5-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK5-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK5-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD15]], ptr [[A16]], align 8 -// CHECK5-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 8 -// CHECK5-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC18]], ptr [[A17]], align 8 -// CHECK5-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK5-NEXT: [[TMP22:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK5-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP22]] -// CHECK5-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i64 1 -// CHECK5-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK5: omp.body.continue22: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK5: omp.inner.for.inc23: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK5: omp.inner.for.end25: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK5: omp.inner.for.cond8: +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK5: omp.inner.for.body10: +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK5-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK5-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK5-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double [[ADD14]], ptr [[A15]], align 8 +// CHECK5-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 8 +// CHECK5-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK5-NEXT: store double [[INC17]], ptr [[A16]], align 8 +// CHECK5-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK5-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK5-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP31]] +// CHECK5-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i64 1 +// CHECK5-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK5: omp.body.continue21: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK5: omp.inner.for.inc22: +// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK5-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK5: omp.inner.for.end24: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK5-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5227,42 +5391,41 @@ // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK5-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK5-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5276,111 +5439,121 @@ // CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK5-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK5-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK5-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK5-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK5-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK5: omp.precond.then: // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK5-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK5-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK5-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK5-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK5-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK5-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK5-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK5-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK5-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK5-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK5-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK5-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK5-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK5-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK5-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK5-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: @@ -5395,30 +5568,31 @@ // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5428,68 +5602,74 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 10, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5524,7 +5704,7 @@ // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4 // CHECK7-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4 -// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 +// CHECK7-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_CASTED4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOFFLOAD_BASEPTRS5:%.*]] = alloca [1 x ptr], align 4 @@ -5589,13 +5769,13 @@ // CHECK7-NEXT: store ptr null, ptr [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 0 // CHECK7-NEXT: [[TMP25:%.*]] = load i16, ptr [[AA]], align 2 // CHECK7-NEXT: store i16 [[TMP25]], ptr [[TMP24]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 1 // CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED]], i32 0, i32 2 // CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK7-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 // CHECK7-NEXT: [[TMP30:%.*]] = call ptr @__kmpc_omp_target_task_alloc(ptr @[[GLOB2]], i32 [[TMP0]], i32 1, i32 72, i32 12, ptr @.omp_task_entry., i64 -1) @@ -5827,7 +6007,7 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -5835,19 +6015,20 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK7-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5857,53 +6038,57 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK7-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK7-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5986,8 +6171,8 @@ // CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 -// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP9]], i32 0, i32 2 // CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK7-NEXT: store i32 1, ptr [[KERNEL_ARGS_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 @@ -6029,21 +6214,22 @@ // CHECK7-SAME: (i32 noundef [[A:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, i32 [[TMP1]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6053,58 +6239,62 @@ // CHECK7-NEXT: [[A1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal !28 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !28 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal !28 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal !28 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: store i32 10, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 10, ptr [[A]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: ret void @@ -6114,21 +6304,22 @@ // CHECK7-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR2]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 2 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, i32 [[TMP1]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6138,58 +6329,62 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK7-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK7-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP31]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK7-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6202,27 +6397,27 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..6, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6232,62 +6427,68 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK7-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK7-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6307,7 +6508,7 @@ // CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -6325,27 +6526,36 @@ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..9, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK7-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK7-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6355,107 +6565,111 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 7 +// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 8 +// CHECK7-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK7-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK7-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK7-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK7-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK7-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK7-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK7-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK7-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK7-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK7-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK7-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK7-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK7-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK7-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK7-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK7-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK7-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK7-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK7-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK7-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK7-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK7-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK7-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6839,8 +7053,7 @@ // CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -6851,29 +7064,34 @@ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..12, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]], i32 [[TMP7]]) +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6883,118 +7101,126 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 5 +// CHECK7-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK7-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK7-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK7-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK7-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK7-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK7-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP40]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK7-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK7: omp.inner.for.cond9: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK7-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK7: omp.inner.for.body11: -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK7-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK7-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD15]], ptr [[A16]], align 4 -// CHECK7-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 4 -// CHECK7-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC18]], ptr [[A17]], align 4 -// CHECK7-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK7-NEXT: [[TMP22:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK7-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP22]] -// CHECK7-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i32 1 -// CHECK7-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK7: omp.body.continue22: -// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK7: omp.inner.for.inc23: -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP43:![0-9]+]] -// CHECK7: omp.inner.for.end25: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK7: omp.inner.for.cond8: +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK7-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK7: omp.inner.for.body10: +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK7-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK7-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK7-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK7-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: store double [[ADD14]], ptr [[A15]], align 4 +// CHECK7-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 4 +// CHECK7-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK7-NEXT: store double [[INC17]], ptr [[A16]], align 4 +// CHECK7-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK7-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK7-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP31]] +// CHECK7-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i32 1 +// CHECK7-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK7-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK7: omp.body.continue21: +// CHECK7-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK7: omp.inner.for.inc22: +// CHECK7-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK7-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK7: omp.inner.for.end24: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK7-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK7-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7010,42 +7236,41 @@ // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK7-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..15, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK7-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK7-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7059,111 +7284,121 @@ // CHECK7-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK7-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK7-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK7-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK7-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK7-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK7-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK7: omp.precond.then: // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK7-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK7-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK7-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK7-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK7-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK7-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK7-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK7-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK7-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK7-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK7-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK7-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK7-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK7-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK7-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK7-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK7-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK7-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK7-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK7-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK7-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK7-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK7-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK7-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK7-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK7-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK7-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK7-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: @@ -7178,30 +7413,31 @@ // CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK7-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..18, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR3]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7211,68 +7447,74 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK7-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK7-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK7: .omp.final.then: // CHECK7-NEXT: store i32 10, ptr [[I]], align 4 // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9425,7 +9667,7 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9433,19 +9675,20 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9455,53 +9698,57 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK17-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9513,21 +9760,22 @@ // CHECK17-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9537,58 +9785,62 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK17-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK17-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9601,27 +9853,27 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9631,62 +9883,68 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK17-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9706,7 +9964,7 @@ // CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -9724,27 +9982,36 @@ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9754,107 +10021,111 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK17-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK17-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK17-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK17-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK17-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK17-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK17-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK17-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK17-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK17-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK17-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK17-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK17-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK17-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK17-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK17-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK17-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK17-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK17-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK17-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK17-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK17-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK17-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK17-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK17-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9870,42 +10141,41 @@ // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK17-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK17-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK17-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9919,111 +10189,121 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK17-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK17-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK17-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK17-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK17-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK17-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK17-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK17-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK17-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK17-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK17-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK17-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK17-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK17-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK17-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK17-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK17-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK17-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK17-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK17-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK17-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK17-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK17-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK17-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK17-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: @@ -10040,7 +10320,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10050,23 +10330,28 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10076,75 +10361,79 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK17-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] -// CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK17-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: store double [[ADD2]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK17-NEXT: store double [[INC]], ptr [[A3]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK17-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP21]] +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK17-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10158,30 +10447,31 @@ // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10191,68 +10481,74 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10266,7 +10562,7 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -10274,19 +10570,20 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10296,53 +10593,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK19-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10354,21 +10655,22 @@ // CHECK19-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10378,58 +10680,62 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK19-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK19-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10442,27 +10748,27 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10472,62 +10778,68 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK19-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10547,7 +10859,7 @@ // CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -10565,27 +10877,36 @@ // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10595,107 +10916,111 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK19-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK19-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK19-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK19-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK19-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK19-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK19-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK19-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK19-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK19-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK19-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK19-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK19-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK19-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK19-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK19-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK19-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK19-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK19-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK19-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK19-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK19-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK19-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK19-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10711,42 +11036,41 @@ // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK19-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK19-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK19-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10760,111 +11084,121 @@ // CHECK19-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK19-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK19-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK19-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK19-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK19-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK19-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK19-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK19-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK19-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK19-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK19-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK19-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK19-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK19-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK19-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK19-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK19-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK19-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK19-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK19-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK19-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK19-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK19-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK19-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK19-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK19-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: @@ -10881,7 +11215,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -10891,23 +11225,28 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10917,75 +11256,79 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double -// CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] -// CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK19-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: store double [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load double, ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 +// CHECK19-NEXT: store double [[INC]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// CHECK19-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP21]] +// CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[ARRAYIDX5]], align 2, !llvm.access.group [[ACC_GRP31]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK19-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10999,30 +11342,31 @@ // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11032,68 +11376,74 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11107,7 +11457,7 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -11115,19 +11465,20 @@ // CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK21-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i64 [[TMP4]]) +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11137,53 +11488,57 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK21-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK21-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK21-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11195,21 +11550,22 @@ // CHECK21-SAME: (i64 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11219,58 +11575,62 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK21-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK21-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP18]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK21-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK21-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11283,27 +11643,27 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i64 [[TMP1]], i64 [[TMP3]]) +// CHECK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11313,62 +11673,68 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP21]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK21-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK21-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK21-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11388,7 +11754,7 @@ // CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -11406,27 +11772,36 @@ // CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i64 [[TMP9]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP4]], i64 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store i64 [[TMP1]], ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store i64 [[TMP4]], ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: store i64 [[TMP5]], ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK21-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK21-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11436,107 +11811,111 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK21-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK21-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 0 // CHECK21-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK21-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK21-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK21-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK21-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 3 +// CHECK21-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK21-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK21-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK21-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK21-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK21-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK21-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 -// CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] -// CHECK21-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] -// CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK21-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK21-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK21-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK21-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i64 0, i64 1 +// CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP14]] +// CHECK21-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i64 [[TMP31]] +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i64 3 +// CHECK21-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK21-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK21-NEXT: store i64 [[ADD15]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK21-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK21-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK21-NEXT: store i8 [[CONV18]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP24]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK21-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK21-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK21-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK21-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK21-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK21-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11552,42 +11931,41 @@ // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK21-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i64 [[TMP2]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK21-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[N:%.*]], i64 noundef [[AA:%.*]], i64 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK21-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11601,111 +11979,121 @@ // CHECK21-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK21-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK21-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK21-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK21-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK21-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK21-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK21-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK21: omp.precond.then: // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK21-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK21-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK21-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK21-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK21-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK21-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK21-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK21-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK21-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK21-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK21-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK21-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK21-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK21-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK21-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK21-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK21-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK21-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK21-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK21-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK21-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK21-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK21-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK21-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK21-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: -// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK21-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK21-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK21-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK21-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK21-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK21-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK21-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK21-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK21: .omp.final.done: @@ -11723,8 +12111,7 @@ // CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK21-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -11735,29 +12122,34 @@ // CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK21-NEXT: store i64 [[TMP1]], ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK21-NEXT: store i64 [[TMP2]], ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK21-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP0]], i64 [[TMP5]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]], i64 [[TMP7]]) +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11767,118 +12159,126 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK21-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 6 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK21-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK21-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK21-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] -// CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK21-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD3]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK21-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP24]] +// CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 +// CHECK21-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP30]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK21-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK21: omp.inner.for.cond9: -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK21-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK21: omp.inner.for.body11: -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK21-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK21-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK21-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK21-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD15]], ptr [[A16]], align 8 -// CHECK21-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 8 -// CHECK21-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC18]], ptr [[A17]], align 8 -// CHECK21-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK21-NEXT: [[TMP22:%.*]] = mul nsw i64 1, [[TMP2]] -// CHECK21-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP22]] -// CHECK21-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i64 1 -// CHECK21-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK21: omp.body.continue22: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK21: omp.inner.for.inc23: -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP33:![0-9]+]] -// CHECK21: omp.inner.for.end25: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK21: omp.inner.for.cond8: +// CHECK21-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK21-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK21: omp.inner.for.body10: +// CHECK21-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK21-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK21-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK21-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: store double [[ADD14]], ptr [[A15]], align 8 +// CHECK21-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 8 +// CHECK21-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK21-NEXT: store double [[INC17]], ptr [[A16]], align 8 +// CHECK21-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK21-NEXT: [[TMP31:%.*]] = mul nsw i64 1, [[TMP8]] +// CHECK21-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP31]] +// CHECK21-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i64 1 +// CHECK21-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK21: omp.body.continue21: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK21: omp.inner.for.inc22: +// CHECK21-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK21-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK21: omp.inner.for.end24: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK21-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK21-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK21-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -11892,30 +12292,31 @@ // CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK21-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i64 [[TMP2]], i64 [[TMP4]], ptr [[TMP0]]) +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK21-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11925,68 +12326,74 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 -// CHECK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK21-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK21-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 2 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK21-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK21-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK21-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 10, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12000,7 +12407,7 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 2 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -12008,19 +12415,20 @@ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK23-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 2 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12030,53 +12438,57 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK23-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 -// CHECK23-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK23-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12088,21 +12500,22 @@ // CHECK23-SAME: (i32 noundef [[AA:%.*]]) #[[ATTR0]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 2 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP0]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP1]], ptr [[TMP0]], align 2 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12112,58 +12525,62 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 2 +// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// CHECK23-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK23-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: store i16 [[CONV3]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP19]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK23-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK23-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12176,27 +12593,27 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP0]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..2, i32 [[TMP1]], i32 [[TMP3]]) +// CHECK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12206,62 +12623,68 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP22]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK23-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK23-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK23-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12281,7 +12704,7 @@ // CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -12299,27 +12722,36 @@ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 // CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @.omp_outlined..3, i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], ptr [[TMP3]], i32 [[TMP4]], i32 [[TMP5]], ptr [[TMP6]], ptr [[TMP7]]) +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP3]], ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK23-NEXT: store ptr [[TMP6]], ptr [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 8 +// CHECK23-NEXT: store ptr [[TMP7]], ptr [[TMP17]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], ptr noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12329,107 +12761,111 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4 -// CHECK23-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 6 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 7 +// CHECK23-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 8 +// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 0 // CHECK23-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9 +// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP21]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double -// CHECK23-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP4]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP28:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CONV:%.*]] = fpext float [[TMP28]] to double +// CHECK23-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.000000e+00 +// CHECK23-NEXT: [[CONV4:%.*]] = fptrunc double [[ADD3]] to float +// CHECK23-NEXT: store float [[CONV4]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 3 +// CHECK23-NEXT: [[TMP29:%.*]] = load float, ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CONV6:%.*]] = fpext float [[TMP29]] to double +// CHECK23-NEXT: [[ADD7:%.*]] = fadd double [[CONV6]], 1.000000e+00 // CHECK23-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK23-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double -// CHECK23-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 -// CHECK23-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK23-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 -// CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] -// CHECK23-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] -// CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK23-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 -// CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 -// CHECK23-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK23-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: store float [[CONV8]], ptr [[ARRAYIDX5]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP10]], i32 0, i32 1 +// CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX9]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP30:%.*]] = load double, ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD11:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD11]], ptr [[ARRAYIDX10]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP14]] +// CHECK23-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, ptr [[TMP16]], i32 [[TMP31]] +// CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX12]], i32 3 +// CHECK23-NEXT: [[TMP32:%.*]] = load double, ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD14:%.*]] = fadd double [[TMP32]], 1.000000e+00 +// CHECK23-NEXT: store double [[ADD14]], ptr [[ARRAYIDX13]], align 8, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP33:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP33]], 1 +// CHECK23-NEXT: store i64 [[ADD15]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], ptr [[TMP18]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP34:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[CONV16:%.*]] = sext i8 [[TMP34]] to i32 +// CHECK23-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV16]], 1 +// CHECK23-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK23-NEXT: store i8 [[CONV18]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK23-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK23-NEXT: [[ADD19:%.*]] = add nsw i32 [[TMP35]], 1 +// CHECK23-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP9]]) -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK23-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK23-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK23-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12445,42 +12881,41 @@ // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 -// CHECK23-NEXT: store i8 [[TMP7]], ptr [[AAA_CASTED]], align 1 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined..4, i32 [[TMP2]], i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, ptr [[AAA_ADDR]], align 1 +// CHECK23-NEXT: store i8 [[TMP8]], ptr [[TMP7]], align 2 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP9]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[N:%.*]], i32 noundef [[AA:%.*]], i32 noundef [[AAA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK23-NEXT: [[AAA:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12494,111 +12929,121 @@ // CHECK23-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]] +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 2 +// CHECK23-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 +// CHECK23-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[SUB:%.*]] = sub i32 [[TMP13]], [[TMP14]] // CHECK23-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK23-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK23-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 // CHECK23-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[I]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: store i32 [[TMP15]], ptr [[I]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP16]], [[TMP17]] // CHECK23-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK23: omp.precond.then: // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK23-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB1]], i32 [[TMP20]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: [[CMP6:%.*]] = icmp ugt i32 [[TMP21]], [[TMP22]] // CHECK23-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 -// CHECK23-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD7:%.*]] = add i32 [[TMP27]], 1 +// CHECK23-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP26]], [[ADD7]] // CHECK23-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 -// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP29]], 1 +// CHECK23-NEXT: [[ADD9:%.*]] = add i32 [[TMP28]], [[MUL]] // CHECK23-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK23-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK23-NEXT: store i32 [[ADD10]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[TMP31:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP31]] to i32 // CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK23-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 +// CHECK23-NEXT: store i16 [[CONV12]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[TMP32:%.*]] = load i8, ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[CONV13:%.*]] = sext i8 [[TMP32]] to i32 // CHECK23-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK23-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK23-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK23-NEXT: store i8 [[CONV15]], ptr [[AAA]], align 1, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP10]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP33]], 1 // CHECK23-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK23-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 +// CHECK23-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK23-NEXT: [[ADD17:%.*]] = add i32 [[TMP34]], 1 // CHECK23-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK23-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK23-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK23-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: -// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[SUB18:%.*]] = sub i32 [[TMP30]], [[TMP31]] +// CHECK23-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[SUB18:%.*]] = sub i32 [[TMP40]], [[TMP41]] // CHECK23-NEXT: [[SUB19:%.*]] = sub i32 [[SUB18]], 1 // CHECK23-NEXT: [[ADD20:%.*]] = add i32 [[SUB19]], 1 // CHECK23-NEXT: [[DIV21:%.*]] = udiv i32 [[ADD20]], 1 // CHECK23-NEXT: [[MUL22:%.*]] = mul i32 [[DIV21]], 1 -// CHECK23-NEXT: [[ADD23:%.*]] = add i32 [[TMP29]], [[MUL22]] +// CHECK23-NEXT: [[ADD23:%.*]] = add i32 [[TMP39]], [[MUL22]] // CHECK23-NEXT: store i32 [[ADD23]], ptr [[I5]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK23: .omp.final.done: @@ -12616,8 +13061,7 @@ // CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK23-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -12628,29 +13072,34 @@ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store i32 [[TMP1]], ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK23-NEXT: store ptr [[TMP3]], ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @.omp_outlined..5, ptr [[TMP0]], i32 [[TMP5]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]], i32 [[TMP7]]) +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[TMP10]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[C:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12660,118 +13109,126 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP12]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 9 +// CHECK23-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double -// CHECK23-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 -// CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] -// CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP22]] to double +// CHECK23-NEXT: [[ADD3:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD3]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP23:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP23]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK23-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP24]] +// CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 +// CHECK23-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP31]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK23-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK23-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK23: omp.inner.for.cond9: -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK23-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END25:%.*]] -// CHECK23: omp.inner.for.body11: -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK23-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[CONV14:%.*]] = sitofp i32 [[TMP20]] to double -// CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[CONV14]], 1.500000e+00 -// CHECK23-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD15]], ptr [[A16]], align 4 -// CHECK23-NEXT: [[A17:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP21:%.*]] = load double, ptr [[A17]], align 4 -// CHECK23-NEXT: [[INC18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC18]], ptr [[A17]], align 4 -// CHECK23-NEXT: [[CONV19:%.*]] = fptosi double [[INC18]] to i16 -// CHECK23-NEXT: [[TMP22:%.*]] = mul nsw i32 1, [[TMP2]] -// CHECK23-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP22]] -// CHECK23-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX20]], i32 1 -// CHECK23-NEXT: store i16 [[CONV19]], ptr [[ARRAYIDX21]], align 2 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE22:%.*]] -// CHECK23: omp.body.continue22: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC23:%.*]] -// CHECK23: omp.inner.for.inc23: -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP34:![0-9]+]] -// CHECK23: omp.inner.for.end25: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK23: omp.inner.for.cond8: +// CHECK23-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK23-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END24:%.*]] +// CHECK23: omp.inner.for.body10: +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK23-NEXT: store i32 [[ADD12]], ptr [[I]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[B]], align 4 +// CHECK23-NEXT: [[CONV13:%.*]] = sitofp i32 [[TMP29]] to double +// CHECK23-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.500000e+00 +// CHECK23-NEXT: [[A15:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: store double [[ADD14]], ptr [[A15]], align 4 +// CHECK23-NEXT: [[A16:%.*]] = getelementptr inbounds [[STRUCT_S1]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP30:%.*]] = load double, ptr [[A16]], align 4 +// CHECK23-NEXT: [[INC17:%.*]] = fadd double [[TMP30]], 1.000000e+00 +// CHECK23-NEXT: store double [[INC17]], ptr [[A16]], align 4 +// CHECK23-NEXT: [[CONV18:%.*]] = fptosi double [[INC17]] to i16 +// CHECK23-NEXT: [[TMP31:%.*]] = mul nsw i32 1, [[TMP8]] +// CHECK23-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 [[TMP31]] +// CHECK23-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX19]], i32 1 +// CHECK23-NEXT: store i16 [[CONV18]], ptr [[ARRAYIDX20]], align 2 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE21:%.*]] +// CHECK23: omp.body.continue21: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC22:%.*]] +// CHECK23: omp.inner.for.inc22: +// CHECK23-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP32]], 1 +// CHECK23-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK23: omp.inner.for.end24: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK23-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK23-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK23-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12785,30 +13242,31 @@ // CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..6, i32 [[TMP2]], i32 [[TMP4]], ptr [[TMP0]]) +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[AA_ADDR]], align 2 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[AA:%.*]] = alloca i16, align 2 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12818,68 +13276,74 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 -// CHECK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 9 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK23-NEXT: store i32 [[ADD2]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[CONV:%.*]] = sext i16 [[TMP16]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK23-NEXT: store i16 [[CONV4]], ptr [[AA]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 2 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK23-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK23-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK23-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 10, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -116,34 +116,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -151,26 +151,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -182,71 +185,73 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -284,34 +289,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -319,26 +324,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -350,69 +358,71 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -455,39 +465,39 @@ // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 @@ -526,37 +536,37 @@ // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 @@ -586,10 +596,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -614,91 +624,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR4:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -709,8 +719,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -719,173 +728,183 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_6:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV7:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK9-NEXT: [[CONV10:%.*]] = sext i32 [[DIV9]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV7]], [[CONV10]] -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB11]], ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[CMP12:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 -// CHECK9-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_6]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK9-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP19]], [[CONV20]] -// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP22]], [[CONV27]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK9-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK9-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP21]], [[MUL33]] -// CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX39]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP42]], 0 // CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 // CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 // CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] -// CHECK9-NEXT: store i32 [[ADD44]], ptr [[I13]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4 -// CHECK9-NEXT: [[SUB45:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK9-NEXT: [[DIV46:%.*]] = sdiv i32 [[SUB45]], 1 -// CHECK9-NEXT: [[MUL47:%.*]] = mul nsw i32 [[DIV46]], 1 -// CHECK9-NEXT: [[ADD48:%.*]] = add nsw i32 0, [[MUL47]] -// CHECK9-NEXT: store i32 [[ADD48]], ptr [[J14]], align 4 +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -906,34 +925,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -945,18 +964,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -968,70 +990,72 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1092,91 +1116,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR4:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l80 @@ -1187,8 +1211,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1197,171 +1220,181 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., i32 [[TMP4]], i32 [[TMP6]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[M_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[M:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[M]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP16]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP19]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP22]], [[CONV25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP21]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = mul nsw i32 [[TMP25]], [[TMP1]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP28]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP33]], 0 -// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 -// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 -// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] -// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP34]], 0 -// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 -// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 -// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] -// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK11-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1 +// CHECK11-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 1 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK11-NEXT: store i32 [[ADD39]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// CHECK11-NEXT: store i32 [[ADD43]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1382,34 +1415,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l67(ptr [[A]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1421,18 +1454,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1444,68 +1480,70 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -1591,13 +1629,13 @@ // CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -1606,16 +1644,16 @@ // CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0 // CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -1625,22 +1663,22 @@ // CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64 // CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1685,38 +1723,38 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 @@ -1792,13 +1830,13 @@ // CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -1807,16 +1845,16 @@ // CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0 // CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0 // CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -1826,20 +1864,20 @@ // CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1 -// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1884,36 +1922,36 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -263,18 +263,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -284,59 +287,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -348,18 +353,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -369,59 +377,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -433,18 +443,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -454,76 +467,78 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -679,18 +694,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -700,58 +718,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -763,18 +783,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -784,58 +807,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -847,18 +872,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -868,75 +896,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1461,27 +1491,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1494,81 +1527,85 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1586,27 +1623,30 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1619,81 +1659,85 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1712,33 +1756,35 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i64 [[TMP3]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP5]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1751,100 +1797,106 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK9-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK9-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -1983,18 +2035,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2004,58 +2059,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2067,18 +2124,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2088,58 +2148,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2151,18 +2213,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2172,75 +2237,77 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2520,27 +2587,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2553,80 +2623,84 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2644,27 +2718,30 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2677,80 +2754,84 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP16]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2769,33 +2850,35 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, i32 [[TMP3]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[N:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2808,99 +2891,105 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP29]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -3039,18 +3128,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3060,57 +3152,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3122,18 +3216,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3143,57 +3240,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3205,18 +3304,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3226,74 +3328,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -339,8 +339,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -349,145 +348,155 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -694,7 +703,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -703,23 +712,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -727,119 +740,123 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1138,8 +1155,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1148,143 +1164,153 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1491,7 +1517,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1500,23 +1526,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1524,117 +1554,121 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2503,35 +2537,33 @@ // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2543,68 +2575,76 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -160,41 +160,39 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store double [[TMP0]], ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load volatile double, ptr [[TMP2]], align 8 -// CHECK1-NEXT: store double [[TMP3]], ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK1-NEXT: store float [[TMP7]], ptr [[SFVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]], i64 [[TMP8]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store double [[TMP1]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load volatile double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SVAR:%.*]], i64 noundef [[SFVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -211,91 +209,101 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store double [[TMP2]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store double [[TMP4]], ptr [[G1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK1-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK1-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP20]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[TMP21]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], ptr [[SVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP24]], ptr [[SFVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G2]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8 +// CHECK1-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR5]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR6]], align 4 +// CHECK1-NEXT: store float [[TMP33]], ptr [[SFVAR]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -334,8 +342,7 @@ // CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -343,26 +350,29 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store float [[TMP5]], ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[TMP0]], ptr [[TMP2]], i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], i32 noundef [[SVAR:%.*]], i32 noundef [[SFVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -370,102 +380,108 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR4:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load float, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store float [[TMP8]], ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR3]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR4]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[SFVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 4 +// CHECK3-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR3]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], ptr [[SVAR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR4]], align 4 +// CHECK3-NEXT: store float [[TMP33]], ptr [[SFVAR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -659,8 +675,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -670,27 +685,31 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -699,28 +718,34 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -730,106 +755,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP22]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done10: +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[SVAR]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done12: // CHECK9-NEXT: ret void // // @@ -1005,7 +1030,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1014,23 +1039,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1039,26 +1068,30 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1068,104 +1101,104 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1365,8 +1398,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1376,27 +1408,31 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SVAR:%.*]]) #[[ATTR4:[0-9]+]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1405,28 +1441,34 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[SVAR]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1436,104 +1478,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP22]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[SVAR_ADDR]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done9: +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR4]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[SVAR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done11: // CHECK11-NEXT: ret void // // @@ -1709,7 +1751,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1718,23 +1760,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1743,26 +1789,30 @@ // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP3:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1772,102 +1822,102 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP14]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP3]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR2]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done8: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP3]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR5]] +// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done10: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp @@ -271,15 +271,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -294,6 +296,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -309,75 +313,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -474,15 +478,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -498,6 +504,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -515,72 +523,72 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -804,15 +812,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -827,6 +837,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -842,73 +854,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1005,15 +1017,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1029,6 +1043,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1046,70 +1062,70 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2059,15 +2075,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l74 // CHECK9-SAME: () #[[ATTR5:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2083,66 +2101,68 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp @@ -89,34 +89,34 @@ // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -129,19 +129,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -152,81 +155,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -240,15 +245,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -263,36 +268,36 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR3]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -304,19 +309,22 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -327,81 +335,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -415,15 +425,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -445,34 +455,34 @@ // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr @_ZZ4mainE5sivar, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l60(ptr @_ZZ4mainE5sivar) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -485,19 +495,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -508,81 +521,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -596,15 +611,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -619,36 +634,36 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(ptr [[T_VAR]]) #[[ATTR3]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -660,19 +675,22 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -683,81 +701,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -771,15 +791,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -808,26 +828,26 @@ // CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 @@ -854,37 +874,37 @@ // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK5-NEXT: ret i32 0 // @@ -907,26 +927,26 @@ // CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 @@ -953,37 +973,37 @@ // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK7-NEXT: ret i32 0 // @@ -1002,19 +1022,22 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1026,84 +1049,86 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -1117,15 +1142,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp --- a/clang/test/OpenMP/target_teams_map_codegen.cpp +++ b/clang/test/OpenMP/target_teams_map_codegen.cpp @@ -86,40 +86,40 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27() #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -130,19 +130,23 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -156,40 +160,40 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.2, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -202,33 +206,39 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[Y_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP5]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK1-NEXT: ret void // // @@ -242,40 +252,40 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -288,60 +298,66 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK1-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -355,23 +371,23 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK1-NEXT: ret void // // @@ -384,34 +400,34 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.8, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45(ptr [[X]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -423,25 +439,30 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -454,34 +475,34 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.11, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51(ptr [[X]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -493,25 +514,30 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -524,34 +550,34 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[X]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.14, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.14, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57(ptr [[X]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -563,25 +589,30 @@ // CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK1-NEXT: ret void // // @@ -599,92 +630,92 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.18, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.18, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP45]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.22, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 -// CHECK1-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.22, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK1: omp_offload.failed5: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]] @@ -697,84 +728,90 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..16, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK1-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK1-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK1-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK1: omp.arraycpy.body8: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK1: omp.arraycpy.body6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK1: omp.arraycpy.done12: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -789,24 +826,24 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -817,84 +854,90 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..20, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK1-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK1-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK1-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK1: omp.arraycpy.body8: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK1: omp.arraycpy.done14: +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK1: omp.arraycpy.body6: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK1: omp.arraycpy.done12: // CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: @@ -909,24 +952,24 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -946,92 +989,92 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[Y]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP17]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP18]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.26, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.27, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.26, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.27, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[X]], ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 -// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[Y]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[X]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 2 +// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP45]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK1-NEXT: store i32 3, ptr [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.30, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.31, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK1-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 -// CHECK1-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, ptr [[TMP34]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.30, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.31, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK1-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK1: omp_offload.failed5: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT6]] @@ -1044,68 +1087,74 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..24, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK1-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK1-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK1-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.25, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) +// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1119,15 +1168,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK1-NEXT: ret void // // @@ -1136,68 +1185,74 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..28, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..28, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..28 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK1-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK1-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK1-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK1-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK1-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK1-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.29, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) +// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK1-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK1-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) +// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK1-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK1-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -1211,15 +1266,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK1-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK1-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK1-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK1-NEXT: ret void // // @@ -1240,40 +1295,40 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27() #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1284,19 +1339,23 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1310,40 +1369,40 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.2, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1356,33 +1415,39 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP5]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK3-NEXT: ret void // // @@ -1396,40 +1461,40 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP11]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39(ptr [[X]], ptr [[Y]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1442,60 +1507,66 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..4, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1509,23 +1580,23 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK3-NEXT: ret void // // @@ -1538,34 +1609,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.8, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45(ptr [[X]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1577,25 +1648,30 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1608,34 +1684,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.11, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51(ptr [[X]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1647,25 +1723,30 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1678,34 +1759,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[X]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.14, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.14, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.15, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57(ptr [[X]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1717,25 +1798,30 @@ // CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK3-NEXT: ret void // // @@ -1753,92 +1839,92 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [3 x ptr], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[Y]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 3, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.18, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP25]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK3-NEXT: br i1 [[TMP27]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.18, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.19, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP30]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP32]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP38]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[X]], ptr [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 2 -// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Y]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[X]], ptr [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 2 +// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK3-NEXT: store i32 3, ptr [[TMP46]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP43]], ptr [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP44]], ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.22, ptr [[TMP49]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP53]], align 8 -// CHECK3-NEXT: [[TMP54:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK3-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 -// CHECK3-NEXT: br i1 [[TMP55]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, ptr [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP31]], ptr [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.22, ptr [[TMP37]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.23, ptr [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP41]], align 8 +// CHECK3-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.region_id, ptr [[KERNEL_ARGS4]]) +// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK3-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] // CHECK3: omp_offload.failed5: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65(ptr [[Y]], ptr [[Z]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT6]] @@ -1851,84 +1937,90 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..16, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK3-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK3-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK3-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.17, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done4: // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK3: omp.arraycpy.body8: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK3: omp.arraycpy.done14: +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK3: omp.arraycpy.body6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK3: omp.arraycpy.done12: // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -1943,24 +2035,24 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done2: // CHECK3-NEXT: ret void @@ -1971,84 +2063,90 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..20, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..20, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..20 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK3-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK3-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK3-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK3: omp.arrayinit.body: // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK3-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK3: omp.arrayinit.done: -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.21, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done4: // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK3: omp.arraycpy.body8: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK3: omp.arraycpy.done14: +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK3: omp.arraycpy.body6: +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK3: omp.arraycpy.done12: // CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: @@ -2063,24 +2161,24 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK3: omp.arraycpy.done2: // CHECK3-NEXT: ret void @@ -2096,19 +2194,23 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: ret void // // @@ -2117,33 +2219,39 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[Y_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i64 [[TMP3]], i64 [[TMP5]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]], i64 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: store i64 [[Y]], ptr [[Y_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK5-NEXT: ret void // // @@ -2152,60 +2260,66 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK5-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[X1]], ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK5-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK5-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[X]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK5-NEXT: store ptr [[Y]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2219,23 +2333,23 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK5-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK5-NEXT: ret void // // @@ -2243,25 +2357,30 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2269,25 +2388,30 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2295,25 +2419,30 @@ // CHECK5-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK5-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK5-NEXT: ret void // // @@ -2322,84 +2451,90 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK5-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK5-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK5-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: // CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK5: omp.arraycpy.body8: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK5: omp.arraycpy.done14: +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK5: omp.arraycpy.body6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK5: omp.arraycpy.done12: // CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: @@ -2414,24 +2549,24 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done2: // CHECK5-NEXT: ret void @@ -2442,84 +2577,90 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..8, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK5-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK5-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK5-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i64 352, i1 false) -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i64 352, i1 false) +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK5: omp.arrayinit.body: // CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK5-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK5: omp.arrayinit.done: -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK5: omp.arraycpy.done6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK5: omp.arraycpy.done4: // CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK5: omp.arraycpy.body8: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK5: omp.arraycpy.done14: +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK5: omp.arraycpy.body6: +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK5: omp.arraycpy.done12: // CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: @@ -2534,24 +2675,24 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i64 99 -// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 99 +// CHECK5-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK5: omp.arraycpy.body: -// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK5-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK5-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK5-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK5-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK5: omp.arraycpy.done2: // CHECK5-NEXT: ret void @@ -2562,68 +2703,74 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..10, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK5-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK5-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK5-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.11, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6:[0-9]+]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6:[0-9]+]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2637,15 +2784,15 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK5-NEXT: ret void // // @@ -2654,68 +2801,74 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..12, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Y:%.*]], ptr noundef nonnull align 16 dereferenceable(16) [[Z:%.*]]) #[[ATTR0]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[Y1:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[Y:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[X:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[Z2:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[Z:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK5-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i128, align 16 -// CHECK5-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i128, align 16 +// CHECK5-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i128, align 16 // CHECK5-NEXT: [[TMP:%.*]] = alloca i128, align 16 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i128, ptr [[TMP0]], align 16 -// CHECK5-NEXT: store i128 [[TMP2]], ptr [[Y1]], align 16 -// CHECK5-NEXT: store i128 0, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK5-NEXT: store ptr [[Z2]], ptr [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP6]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var) -// CHECK5-NEXT: switch i32 [[TMP8]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i128, ptr [[TMP2]], align 16 +// CHECK5-NEXT: store i128 [[TMP5]], ptr [[Y]], align 16 +// CHECK5-NEXT: store i128 0, ptr [[Z]], align 16 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK5-NEXT: store ptr [[Z]], ptr [[TMP6]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.13, ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK5-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK5-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK5-NEXT: ] // CHECK5: .omp.reduction.case1: -// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP1]], align 16 -// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP9]], [[TMP10]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP1]], align 16 -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: [[TMP10:%.*]] = load i128, ptr [[TMP4]], align 16 +// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP4]], align 16 +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.case2: -// CHECK5-NEXT: [[TMP11:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK5: atomic_cont: -// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 -// CHECK5-NEXT: store i128 [[TMP14]], ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[TMP]], align 16 -// CHECK5-NEXT: [[TMP16:%.*]] = load i128, ptr [[Z2]], align 16 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i128 [[TMP15]], [[TMP16]] -// CHECK5-NEXT: store i128 [[ADD4]], ptr [[ATOMIC_TEMP3]], align 16 -// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP1]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP3]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[ATOMIC_TEMP]], align 16 +// CHECK5-NEXT: store i128 [[TMP13]], ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP14:%.*]] = load i128, ptr [[TMP]], align 16 +// CHECK5-NEXT: [[TMP15:%.*]] = load i128, ptr [[Z]], align 16 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i128 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: store i128 [[ADD2]], ptr [[ATOMIC_TEMP1]], align 16 +// CHECK5-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP4]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef signext 0, i32 noundef signext 0) #[[ATTR6]] // CHECK5-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK5: atomic_exit: -// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP6]], ptr @.gomp_critical_user_.reduction.var) +// CHECK5-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK5: .omp.reduction.default: // CHECK5-NEXT: ret void @@ -2729,34 +2882,38 @@ // CHECK5-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK5-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP10]], align 16 -// CHECK5-NEXT: [[TMP13:%.*]] = load i128, ptr [[TMP7]], align 16 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP10]], align 16 +// CHECK5-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP7]], align 16 +// CHECK5-NEXT: [[TMP9:%.*]] = load i128, ptr [[TMP5]], align 16 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i128 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: store i128 [[ADD]], ptr [[TMP7]], align 16 // CHECK5-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27 // CHECK7-SAME: () #[[ATTR0:[0-9]+]] { // CHECK7-NEXT: entry: -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: ret void // // @@ -2765,33 +2922,39 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: store i32 [[TMP4]], ptr [[Y_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..1, i32 [[TMP3]], i32 [[TMP5]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[Y]], align 4 // CHECK7-NEXT: ret void // // @@ -2800,60 +2963,66 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[Y:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X1:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[Y2:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: store i32 0, ptr [[X1]], align 4 -// CHECK7-NEXT: store i32 0, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[X1]], ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK7-NEXT: store ptr [[Y2]], ptr [[TMP4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: store i32 0, ptr [[X]], align 4 +// CHECK7-NEXT: store i32 0, ptr [[Y]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[X]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[Y]], ptr [[TMP6]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 2, i32 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X1]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[X]], align 4 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[TMP4]], align 4 +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[X1]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP14]] monotonic, align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y2]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP16]] monotonic, align 4 -// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP7]], ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[X]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP14]] monotonic, align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = atomicrmw add ptr [[TMP4]], i32 [[TMP16]] monotonic, align 4 +// CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: // CHECK7-NEXT: ret void @@ -2867,23 +3036,23 @@ // CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[TMP16]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP2]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[TMP11]], align 4 // CHECK7-NEXT: ret void // // @@ -2891,25 +3060,30 @@ // CHECK7-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -2917,25 +3091,30 @@ // CHECK7-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -2943,25 +3122,30 @@ // CHECK7-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, i32 [[TMP2]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[X:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X]], align 4 // CHECK7-NEXT: ret void // // @@ -2970,84 +3154,90 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..6, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK7-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK7-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK7-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.7, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done4: // CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK7: omp.arraycpy.body8: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK7: omp.arraycpy.done14: +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK7: omp.arraycpy.body6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK7: omp.arraycpy.done12: // CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: @@ -3062,24 +3252,24 @@ // CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK7: omp.arraycpy.done2: // CHECK7-NEXT: ret void @@ -3090,84 +3280,90 @@ // CHECK7-NEXT: entry: // CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..8, ptr [[TMP0]], ptr [[TMP1]]) +// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(352) [[Y:%.*]], ptr noundef nonnull align 4 dereferenceable(396) [[Z:%.*]]) #[[ATTR0]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Z_ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[Y1:%.*]] = alloca [88 x i32], align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[Y:%.*]] = alloca [88 x i32], align 4 // CHECK7-NEXT: [[X:%.*]] = alloca [77 x i32], align 4 -// CHECK7-NEXT: [[Z2:%.*]] = alloca [99 x i32], align 4 +// CHECK7-NEXT: [[Z:%.*]] = alloca [99 x i32], align 4 // CHECK7-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Y]], ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: store ptr [[Z]], ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[Y_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Z_ADDR]], align 4 -// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y1]], ptr align 4 [[TMP0]], i32 352, i1 false) -// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP4]] +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Y]], ptr align 4 [[TMP2]], i32 352, i1 false) +// CHECK7-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [99 x i32], ptr [[Z]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[ARRAY_BEGIN]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK7: omp.arrayinit.body: // CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK7-NEXT: store i32 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK7: omp.arrayinit.done: -// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK7-NEXT: store ptr [[Z2]], ptr [[TMP5]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[Z]], ptr [[TMP6]], align 4 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK7-NEXT: switch i32 [[TMP10]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK7-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP8]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK7-NEXT: switch i32 [[TMP9]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK7-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK7-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK7-NEXT: ] // CHECK7: .omp.reduction.case1: -// CHECK7-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE6:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK7-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP4]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST3:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT4:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT4]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST3]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST1:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT2:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT2]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST1]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE5:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT4]], [[TMP11]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_DONE6]], label [[OMP_ARRAYCPY_BODY]] -// CHECK7: omp.arraycpy.done6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE3:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT2]], [[TMP10]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] +// CHECK7: omp.arraycpy.done4: // CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.case2: -// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP1]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY7:%.*]] = icmp eq ptr [[TMP1]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY7]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY8:%.*]] -// CHECK7: omp.arraycpy.body8: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST9:%.*]] = phi ptr [ [[Z2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT12:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST10:%.*]] = phi ptr [ [[TMP1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT11:%.*]], [[OMP_ARRAYCPY_BODY8]] ] -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 [[TMP15]] monotonic, align 4 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT11]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST10]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT12]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST9]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE13:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT11]], [[TMP14]] -// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY8]] -// CHECK7: omp.arraycpy.done14: +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP4]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY5:%.*]] = icmp eq ptr [[TMP4]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY5]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY6:%.*]] +// CHECK7: omp.arraycpy.body6: +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST7:%.*]] = phi ptr [ [[Z]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT10:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST8:%.*]] = phi ptr [ [[TMP4]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT9:%.*]], [[OMP_ARRAYCPY_BODY6]] ] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = atomicrmw add ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 [[TMP14]] monotonic, align 4 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT9]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST8]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT10]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST7]], i32 1 +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE11:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT9]], [[TMP13]] +// CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY6]] +// CHECK7: omp.arraycpy.done12: // CHECK7-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP8]], ptr @.gomp_critical_user_.reduction.var) // CHECK7-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK7: .omp.reduction.default: @@ -3182,24 +3378,24 @@ // CHECK7-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK7-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 99 -// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP10]], [[TMP12]] +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 99 +// CHECK7-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP7]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK7: omp.arraycpy.body: -// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 4 // CHECK7-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK7-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i32, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP12]] +// CHECK7-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK7-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK7: omp.arraycpy.done2: // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -531,7 +531,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -539,29 +539,35 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -570,25 +576,30 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -597,21 +608,25 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -619,40 +634,48 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -662,8 +685,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -671,32 +693,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1064,7 +1093,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1072,29 +1101,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1103,25 +1138,30 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1130,21 +1170,25 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1152,40 +1196,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1195,8 +1247,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1204,32 +1255,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1244,21 +1302,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1266,21 +1328,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1290,7 +1356,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1298,29 +1364,35 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1329,25 +1401,30 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1355,19 +1432,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1377,8 +1458,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1386,32 +1466,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1419,21 +1506,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1441,21 +1532,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1465,7 +1560,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1473,29 +1568,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1504,25 +1605,30 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1530,19 +1636,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1552,8 +1662,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1561,31 +1670,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -546,7 +546,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -554,29 +554,35 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i64 [[TMP4]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -585,25 +591,30 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK1-NEXT: ret void // @@ -613,23 +624,27 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -637,40 +652,48 @@ // CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: ret void // // @@ -680,8 +703,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -689,32 +711,39 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK1-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK1-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK1-NEXT: ret void // // @@ -1097,7 +1126,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1105,29 +1134,35 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined., ptr [[TMP1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1136,25 +1171,30 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK3-NEXT: ret void // @@ -1164,23 +1204,27 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1188,40 +1232,48 @@ // CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..7) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..10) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: ret void // // @@ -1231,8 +1283,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1240,32 +1291,39 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..11, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK3-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK3-NEXT: ret void // // @@ -1281,23 +1339,27 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1305,21 +1367,25 @@ // CHECK9-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1329,7 +1395,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1337,29 +1403,35 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1368,25 +1440,30 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 // CHECK9-NEXT: ret void // @@ -1394,19 +1471,23 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1416,8 +1497,7 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -1425,32 +1505,39 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // // @@ -1459,23 +1546,27 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_1]], ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1483,21 +1574,25 @@ // CHECK11-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1507,7 +1602,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1515,29 +1610,35 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..2, ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double // CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1546,25 +1647,30 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1572,19 +1678,23 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: ret void // // @@ -1594,8 +1704,7 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -1603,31 +1712,38 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..5, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[B_ADDR]], align 2 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B:%.*]] = alloca i16, align 2 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CONV]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -1226,44 +1226,49 @@ // CHECK1-SAME: () #[[ATTR8]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[A]]) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP6]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load double, ptr [[B]], align 8 -// CHECK1-NEXT: store double [[TMP12]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]]) -// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP8]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[B]], align 8 +// CHECK1-NEXT: store double [[TMP14]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]]) +// CHECK1-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -1275,7 +1280,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK1-NEXT: ret void @@ -1297,11 +1302,11 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) @@ -2346,44 +2351,49 @@ // CHECK2-SAME: () #[[ATTR8]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[A]]) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..24, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..24 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_23:%.*]], align 8 +// CHECK2-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_23]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load double, ptr [[B]], align 8 -// CHECK2-NEXT: store double [[TMP12]], ptr [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]]) -// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..27) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load double, ptr [[B]], align 8 +// CHECK2-NEXT: store double [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]]) +// CHECK2-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: -// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK2-NEXT: ret void // // @@ -2395,7 +2405,7 @@ // CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_25:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_26:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-NEXT: ret void @@ -2417,11 +2427,11 @@ // CHECK2-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_25]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) @@ -3548,44 +3558,49 @@ // CHECK2-51-SAME: () #[[ATTR8]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[A:%.*]] = alloca float, align 4 -// CHECK2-51-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[A]]) +// CHECK2-51-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 +// CHECK2-51-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-51-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 +// CHECK2-51-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-51-NEXT: ret void // // // CHECK2-51-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK2-51-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR9:[0-9]+]] { +// CHECK2-51-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR9:[0-9]+]] { // CHECK2-51-NEXT: entry: // CHECK2-51-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-51-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-51-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-51-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-51-NEXT: [[B:%.*]] = alloca double, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_25:%.*]], align 8 +// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_26:%.*]], align 8 // CHECK2-51-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-51-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-51-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-51-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-51-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 -// CHECK2-51-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK2-51-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_25:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-51-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-51-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_single(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-51-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK2-51-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2-51: omp_if.then: -// CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_25]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK2-51-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..29) -// CHECK2-51-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK2-51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP9]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK2-51-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], ptr [[TMP6]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], ptr [[TMP10]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP12:%.*]] = load double, ptr [[B]], align 8 -// CHECK2-51-NEXT: store double [[TMP12]], ptr [[TMP11]], align 8 -// CHECK2-51-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP6]]) -// CHECK2-51-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-51-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_26]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK2-51-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK2-51-NEXT: [[TMP8:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i64 48, i64 8, ptr @.omp_task_entry..29) +// CHECK2-51-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27:%.*]], ptr [[TMP8]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP9]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK2-51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP11]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK2-51-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27]], ptr [[TMP8]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_28:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP14:%.*]] = load double, ptr [[B]], align 8 +// CHECK2-51-NEXT: store double [[TMP14]], ptr [[TMP13]], align 8 +// CHECK2-51-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP4]], ptr [[TMP8]]) +// CHECK2-51-NEXT: call void @__kmpc_end_single(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK2-51-NEXT: br label [[OMP_IF_END]] // CHECK2-51: omp_if.end: -// CHECK2-51-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-51-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]]) // CHECK2-51-NEXT: ret void // // @@ -3597,7 +3612,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_27:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_28:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3619,11 +3634,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_26]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_27]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META136:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) @@ -3657,26 +3672,26 @@ // CHECK2-51-NEXT: [[C:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK2-51-NEXT: [[E:%.*]] = alloca i32, align 4 -// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_28:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_29:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_31:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_32:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR2:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR3:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_34:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED4:%.*]] = alloca [[STRUCT_ANON_35:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR5:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR6:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_37:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED7:%.*]] = alloca [[STRUCT_ANON_38:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR8:%.*]] = alloca [2 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR9:%.*]] = alloca i64, align 8 -// CHECK2-51-NEXT: [[AGG_CAPTURED10:%.*]] = alloca [[STRUCT_ANON_40:%.*]], align 1 +// CHECK2-51-NEXT: [[AGG_CAPTURED10:%.*]] = alloca [[STRUCT_ANON_41:%.*]], align 1 // CHECK2-51-NEXT: [[DOTDEP_ARR_ADDR11:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK2-51-NEXT: [[DEP_COUNTER_ADDR12:%.*]] = alloca i64, align 8 // CHECK2-51-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK2-51-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..32) -// CHECK2-51-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], ptr [[TMP1]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], ptr [[TMP1]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30]], ptr [[TMP1]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_31:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 @@ -3698,9 +3713,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR]], align 8 // CHECK2-51-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]], i32 2, ptr [[TMP6]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..35) -// CHECK2-51-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], ptr [[TMP17]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], ptr [[TMP17]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33:%.*]], ptr [[TMP17]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33]], ptr [[TMP17]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_34:%.*]], ptr [[TMP19]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP21:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 // CHECK2-51-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR2]], i64 0, i64 0 @@ -3722,9 +3737,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR3]], align 8 // CHECK2-51-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP17]], i32 2, ptr [[TMP22]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP33:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..38) -// CHECK2-51-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], ptr [[TMP33]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], ptr [[TMP33]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], ptr [[TMP35]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36:%.*]], ptr [[TMP33]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36]], ptr [[TMP33]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_37:%.*]], ptr [[TMP35]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP37:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP37]], ptr [[TMP36]], align 8 // CHECK2-51-NEXT: [[TMP38:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR5]], i64 0, i64 0 @@ -3746,9 +3761,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR6]], align 8 // CHECK2-51-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP33]], i32 2, ptr [[TMP38]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..41) -// CHECK2-51-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], ptr [[TMP49]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], ptr [[TMP49]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], ptr [[TMP51]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39:%.*]], ptr [[TMP49]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39]], ptr [[TMP49]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_40:%.*]], ptr [[TMP51]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP53:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP53]], ptr [[TMP52]], align 8 // CHECK2-51-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR8]], i64 0, i64 0 @@ -3770,9 +3785,9 @@ // CHECK2-51-NEXT: store i64 2, ptr [[DEP_COUNTER_ADDR9]], align 8 // CHECK2-51-NEXT: [[TMP64:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP49]], i32 2, ptr [[TMP54]], i32 0, ptr null) // CHECK2-51-NEXT: [[TMP65:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry..44) -// CHECK2-51-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], ptr [[TMP65]], i32 0, i32 0 -// CHECK2-51-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], ptr [[TMP65]], i32 0, i32 1 -// CHECK2-51-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], ptr [[TMP67]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42:%.*]], ptr [[TMP65]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42]], ptr [[TMP65]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_43:%.*]], ptr [[TMP67]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP69:%.*]] = load i32, ptr [[A]], align 4 // CHECK2-51-NEXT: store i32 [[TMP69]], ptr [[TMP68]], align 8 // CHECK2-51-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR11]], i64 0, i64 0 @@ -3796,7 +3811,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_30:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_31:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3818,11 +3833,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_29]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_30]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META145:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META150:![0-9]+]]) @@ -3850,7 +3865,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_33:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_34:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3872,11 +3887,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_32]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_33]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META155:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META158:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META160:![0-9]+]]) @@ -3904,7 +3919,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_36:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_37:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3926,11 +3941,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_35]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_36]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META170:![0-9]+]]) @@ -3958,7 +3973,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_39:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_40:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -3980,11 +3995,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_38]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_39]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META175:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META178:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META180:![0-9]+]]) @@ -4012,7 +4027,7 @@ // CHECK2-51-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_42:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_43:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 // CHECK2-51-NEXT: ret void @@ -4034,11 +4049,11 @@ // CHECK2-51-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK2-51-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK2-51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK2-51-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK2-51-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK2-51-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_41]], ptr [[TMP3]], i32 0, i32 1 +// CHECK2-51-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_42]], ptr [[TMP3]], i32 0, i32 1 // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META185:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META188:![0-9]+]]) // CHECK2-51-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META190:![0-9]+]]) diff --git a/clang/test/OpenMP/task_if_codegen.cpp b/clang/test/OpenMP/task_if_codegen.cpp --- a/clang/test/OpenMP/task_if_codegen.cpp +++ b/clang/test/OpenMP/task_if_codegen.cpp @@ -90,25 +90,29 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z9gtid_testv // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP2]], i32 0, i32 0 -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i64 40, i64 1, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @.omp_task_entry.(i32 [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP2]], ptr [[TMP3]]) // CHECK1-NEXT: ret void // // @@ -141,7 +145,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void @_Z9gtid_testv() // CHECK1-NEXT: ret i32 0 // @@ -150,62 +154,62 @@ // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..3) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..5) -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP5]]) -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @.omp_task_entry..5(i32 [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP5]]) -// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..7) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP9]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..5) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @.omp_task_entry..5(i32 [[TMP0]], ptr [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..7) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP9]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @.omp_task_entry..7(i32 [[TMP0]], ptr [[TMP9]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @.omp_task_entry..7(i32 [[TMP0]], ptr [[TMP7]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..9) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP18]], i64 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP19]], i32 0, i32 0 -// CHECK1-NEXT: store i64 ptrtoint (ptr @Arg to i64), ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP19]], i32 0, i32 1 -// CHECK1-NEXT: store i64 4, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP19]], i32 0, i32 2 -// CHECK1-NEXT: store i8 3, ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..9) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_8:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 0 +// CHECK1-NEXT: store i64 ptrtoint (ptr @Arg to i64), ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 1 +// CHECK1-NEXT: store i64 4, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP15]], i32 0, i32 2 +// CHECK1-NEXT: store i8 3, ptr [[TMP18]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DEP_COUNTER_ADDR]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[TMP19]], 0 // CHECK1-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK1: omp_if.then5: -// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP15]], i32 1, ptr [[TMP18]], i32 0, ptr null) +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP12]], i32 1, ptr [[TMP14]], i32 0, ptr null) // CHECK1-NEXT: br label [[OMP_IF_END7:%.*]] // CHECK1: omp_if.else6: -// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP18]], i32 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP15]]) -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], ptr [[TMP15]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP15]]) +// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP14]], i32 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP12]]) +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @.omp_task_entry..9(i32 [[TMP0]], ptr [[TMP12]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END7]] // CHECK1: omp_if.end7: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP27]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP22]]) // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -224,7 +228,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -238,7 +242,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !24 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: call void @_Z3fn7v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -258,7 +262,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_4:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -272,7 +276,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !34 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !34 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !34 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !34 // CHECK1-NEXT: call void @_Z3fn8v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -292,7 +296,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_5:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_6:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -306,7 +310,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !44 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !44 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !44 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !44 // CHECK1-NEXT: call void @_Z3fn9v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -326,7 +330,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_7:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_8:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -340,7 +344,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !54 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !54 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !54 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !54 // CHECK1-NEXT: call void @_Z4fn10v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -349,115 +353,115 @@ // CHECK1-SAME: (i32 noundef [[ARG:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 1 -// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED2:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED3:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED8:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED8:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR9:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR10:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 1 +// CHECK1-NEXT: [[AGG_CAPTURED15:%.*]] = alloca [[STRUCT_ANON_19:%.*]], align 1 // CHECK1-NEXT: [[DOTDEP_ARR_ADDR16:%.*]] = alloca [1 x %struct.kmp_depend_info], align 8 // CHECK1-NEXT: [[DEP_COUNTER_ADDR17:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..11) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..13) -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP5]]) -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @.omp_task_entry..13(i32 [[TMP0]], ptr [[TMP5]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP5]]) -// CHECK1-NEXT: [[TMP9:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..15) -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_13:%.*]], ptr [[TMP9]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..13) +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @.omp_task_entry..13(i32 [[TMP0]], ptr [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP4]]) +// CHECK1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..15) +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_14:%.*]], ptr [[TMP7]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP9]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP9]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP9]]) -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @.omp_task_entry..15(i32 [[TMP0]], ptr [[TMP9]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP9]]) +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]]) +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @.omp_task_entry..15(i32 [[TMP0]], ptr [[TMP7]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..17) -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], ptr [[TMP15]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP18]], i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 1 -// CHECK1-NEXT: store i64 4, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP20]], i32 0, i32 2 -// CHECK1-NEXT: store i8 1, ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..17) +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_16:%.*]], ptr [[TMP12]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO:%.*]], ptr [[TMP14]], i64 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 1 +// CHECK1-NEXT: store i64 4, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP16]], i32 0, i32 2 +// CHECK1-NEXT: store i8 1, ptr [[TMP19]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DEP_COUNTER_ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: [[TOBOOL4:%.*]] = icmp ne i32 [[TMP20]], 0 // CHECK1-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK1: omp_if.then5: -// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP15]], i32 1, ptr [[TMP18]], i32 0, ptr null) +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP12]], i32 1, ptr [[TMP14]], i32 0, ptr null) // CHECK1-NEXT: br label [[OMP_IF_END7:%.*]] // CHECK1: omp_if.else6: -// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP18]], i32 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP15]]) -// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @.omp_task_entry..17(i32 [[TMP0]], ptr [[TMP15]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP15]]) +// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP14]], i32 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP12]]) +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @.omp_task_entry..17(i32 [[TMP0]], ptr [[TMP12]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP12]]) // CHECK1-NEXT: br label [[OMP_IF_END7]] // CHECK1: omp_if.end7: -// CHECK1-NEXT: [[TMP28:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..19) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], ptr [[TMP28]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP32:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP31]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP33]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP32]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP33]], i32 0, i32 1 -// CHECK1-NEXT: store i64 4, ptr [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP33]], i32 0, i32 2 -// CHECK1-NEXT: store i8 3, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..19) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], ptr [[TMP23]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR9]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP25]], i64 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP27]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP26]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP27]], i32 0, i32 1 +// CHECK1-NEXT: store i64 4, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP27]], i32 0, i32 2 +// CHECK1-NEXT: store i8 3, ptr [[TMP30]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DEP_COUNTER_ADDR10]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[TMP38]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: [[TOBOOL11:%.*]] = icmp ne i32 [[TMP31]], 0 // CHECK1-NEXT: br i1 [[TOBOOL11]], label [[OMP_IF_THEN12:%.*]], label [[OMP_IF_ELSE13:%.*]] // CHECK1: omp_if.then12: -// CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP28]], i32 1, ptr [[TMP31]], i32 0, ptr null) +// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP23]], i32 1, ptr [[TMP25]], i32 0, ptr null) // CHECK1-NEXT: br label [[OMP_IF_END14:%.*]] // CHECK1: omp_if.else13: -// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP31]], i32 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP28]]) -// CHECK1-NEXT: [[TMP40:%.*]] = call i32 @.omp_task_entry..19(i32 [[TMP0]], ptr [[TMP28]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP28]]) +// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP25]], i32 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP23]]) +// CHECK1-NEXT: [[TMP33:%.*]] = call i32 @.omp_task_entry..19(i32 [[TMP0]], ptr [[TMP23]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP23]]) // CHECK1-NEXT: br label [[OMP_IF_END14]] // CHECK1: omp_if.end14: -// CHECK1-NEXT: [[TMP41:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..21) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], ptr [[TMP41]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP44]], i64 0 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP46]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP46]], i32 0, i32 1 -// CHECK1-NEXT: store i64 4, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP46]], i32 0, i32 2 -// CHECK1-NEXT: store i8 3, ptr [[TMP49]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 1, ptr @.omp_task_entry..21) +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_20:%.*]], ptr [[TMP34]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x %struct.kmp_depend_info], ptr [[DOTDEP_ARR_ADDR16]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP37:%.*]] = ptrtoint ptr [[ARG_ADDR]] to i64 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP36]], i64 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP38]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP37]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP38]], i32 0, i32 1 +// CHECK1-NEXT: store i64 4, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_DEPEND_INFO]], ptr [[TMP38]], i32 0, i32 2 +// CHECK1-NEXT: store i8 3, ptr [[TMP41]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DEP_COUNTER_ADDR17]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: [[TOBOOL18:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: [[TOBOOL18:%.*]] = icmp ne i32 [[TMP42]], 0 // CHECK1-NEXT: br i1 [[TOBOOL18]], label [[OMP_IF_THEN19:%.*]], label [[OMP_IF_ELSE20:%.*]] // CHECK1: omp_if.then19: -// CHECK1-NEXT: [[TMP52:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP41]], i32 1, ptr [[TMP44]], i32 0, ptr null) +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_omp_task_with_deps(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP34]], i32 1, ptr [[TMP36]], i32 0, ptr null) // CHECK1-NEXT: br label [[OMP_IF_END21:%.*]] // CHECK1: omp_if.else20: -// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP44]], i32 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP41]]) -// CHECK1-NEXT: [[TMP53:%.*]] = call i32 @.omp_task_entry..21(i32 [[TMP0]], ptr [[TMP41]]) #[[ATTR3]] -// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP41]]) +// CHECK1-NEXT: call void @__kmpc_omp_wait_deps(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, ptr [[TMP36]], i32 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_omp_task_begin_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP34]]) +// CHECK1-NEXT: [[TMP44:%.*]] = call i32 @.omp_task_entry..21(i32 [[TMP0]], ptr [[TMP34]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_omp_task_complete_if0(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP34]]) // CHECK1-NEXT: br label [[OMP_IF_END21]] // CHECK1: omp_if.end21: // CHECK1-NEXT: ret i32 0 @@ -478,7 +482,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_9:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_10:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -492,7 +496,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: call void @_Z3fn1v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -512,7 +516,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_11:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_12:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -526,7 +530,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !74 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !74 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !74 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !74 // CHECK1-NEXT: call void @_Z3fn2v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -546,7 +550,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_13:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_14:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -560,7 +564,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !84 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !84 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !84 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !84 // CHECK1-NEXT: call void @_Z3fn3v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -580,7 +584,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_15:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_16:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -594,7 +598,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !94 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !94 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !94 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !94 // CHECK1-NEXT: call void @_Z3fn4v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -614,7 +618,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_17:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -628,7 +632,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !104 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !104 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !104 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !104 // CHECK1-NEXT: call void @_Z3fn5v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -648,7 +652,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_20:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 @@ -662,7 +666,7 @@ // CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !114 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !114 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !114 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !114 // CHECK1-NEXT: call void @_Z3fn6v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp --- a/clang/test/OpenMP/task_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp @@ -68,7 +68,8 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -172,32 +173,42 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..11) -// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP44]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP45]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP47:%.*]] = load ptr, ptr [[TMP46]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP47]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP44]]) +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 40, i64 8, ptr @.omp_task_entry..11) +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP49]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP50]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP51]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP52]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK1-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP49]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP49:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP49]]) +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP54]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP50]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP55]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP51]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP56]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -455,53 +466,51 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i64 56, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP16]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP12]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP17]], i32 1, i64 56, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP17]], ptr [[TMP18]]) // CHECK1-NEXT: ret void // // @@ -557,7 +566,7 @@ // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 @@ -568,7 +577,7 @@ // CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP19:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP18]], ptr [[TMP17]], ptr [[TMP16]]) -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP11]], 2 // CHECK1-NEXT: [[TMP23:%.*]] = udiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) @@ -602,7 +611,7 @@ // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3:%.*]], ptr [[TMP3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 diff --git a/clang/test/OpenMP/task_member_call_codegen.cpp b/clang/test/OpenMP/task_member_call_codegen.cpp --- a/clang/test/OpenMP/task_member_call_codegen.cpp +++ b/clang/test/OpenMP/task_member_call_codegen.cpp @@ -31,10 +31,10 @@ // CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK1-NEXT: ret void // // @@ -72,23 +72,23 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void @_ZN1a1bEv(ptr noundef nonnull align 1 dereferenceable(1) [[TMP16]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void @_ZN1a1bEv(ptr noundef nonnull align 1 dereferenceable(1) [[TMP12]]) #[[ATTR4]] // CHECK1-NEXT: ret i32 0 // // @@ -99,11 +99,11 @@ // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK3-NEXT: [[TMP0:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]], i32 1, i64 48, i64 1, ptr @.omp_task_entry.) -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP0]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], ptr [[TMP0]]) // CHECK3-NEXT: ret void // // @@ -141,22 +141,22 @@ // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK3-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK3-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK3-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK3-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK3-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK3-NEXT: call void @_ZN1a1bEv(ptr noundef nonnull align 1 dereferenceable(1) [[TMP16]]) #[[ATTR4]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: call void @_ZN1a1bEv(ptr noundef nonnull align 1 dereferenceable(1) [[TMP12]]) #[[ATTR4]] // CHECK3-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/task_target_device_codegen.c b/clang/test/OpenMP/task_target_device_codegen.c --- a/clang/test/OpenMP/task_target_device_codegen.c +++ b/clang/test/OpenMP/task_target_device_codegen.c @@ -6,7 +6,6 @@ // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -fopenmp-version=50 -x c -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -27,12 +26,12 @@ // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 0, ptr @.omp_task_entry.) -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i32 0, i32 0 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i32 0, i32 1 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP4]], i32 0, i32 0 -// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[T]], align 4 -// CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 8 -// CHECK-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[T]], align 4 +// CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]) // CHECK-NEXT: ret void // // @@ -77,24 +76,34 @@ // CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]] -// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !12 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR__I]], align 4, !noalias !12 // CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_test_task_affinity_l18() #[[ATTR4]] // CHECK-NEXT: ret i32 0 // +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@test_task_affinity +// SIMD-ONLY0-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[T:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, ptr [[T]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// SIMD-ONLY0-NEXT: ret void +// diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp --- a/clang/test/OpenMP/taskgroup_codegen.cpp +++ b/clang/test/OpenMP/taskgroup_codegen.cpp @@ -32,6 +32,20 @@ foo(); } #endif + + + + + + + + + + + + + + // CHECK1-LABEL: define {{[^@]+}}@_Z3foov // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { // CHECK1-NEXT: entry: @@ -75,30 +89,34 @@ // CHECK1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv // CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: invoke void @_Z3foov() // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP3:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8]] +// CHECK1-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP4]]) #[[ATTR8]] // CHECK1-NEXT: unreachable // // @@ -145,95 +163,101 @@ // DEBUG1-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv // DEBUG1-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG20:![0-9]+]] { // DEBUG1-NEXT: entry: -// DEBUG1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 0, ptr @.omp_outlined.), !dbg [[DBG21:![0-9]+]] +// DEBUG1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// DEBUG1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]), !dbg [[DBG21:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG22:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@.omp_outlined. -// DEBUG1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG23:![0-9]+]] { +// DEBUG1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR7:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG23:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// DEBUG1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // DEBUG1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// DEBUG1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG24:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG24]] -// DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB5:[0-9]+]], i32 [[TMP1]]), !dbg [[DBG24]] +// DEBUG1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// DEBUG1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8, !dbg [[DBG24:![0-9]+]] +// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25:![0-9]+]] +// DEBUG1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG25]] +// DEBUG1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB5:[0-9]+]], i32 [[TMP2]]), !dbg [[DBG25]] // DEBUG1-NEXT: invoke void @_Z3foov() -// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG25:![0-9]+]] +// DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG26:![0-9]+]] // DEBUG1: invoke.cont: -// DEBUG1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB5]], i32 [[TMP1]]), !dbg [[DBG25]] -// DEBUG1-NEXT: ret void, !dbg [[DBG26:![0-9]+]] +// DEBUG1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB5]], i32 [[TMP2]]), !dbg [[DBG26]] +// DEBUG1-NEXT: ret void, !dbg [[DBG27:![0-9]+]] // DEBUG1: terminate.lpad: -// DEBUG1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } -// DEBUG1-NEXT: catch ptr null, !dbg [[DBG25]] -// DEBUG1-NEXT: [[TMP3:%.*]] = extractvalue { ptr, i32 } [[TMP2]], 0, !dbg [[DBG25]] -// DEBUG1-NEXT: call void @__clang_call_terminate(ptr [[TMP3]]) #[[ATTR8]], !dbg [[DBG25]] -// DEBUG1-NEXT: unreachable, !dbg [[DBG25]] +// DEBUG1-NEXT: [[TMP3:%.*]] = landingpad { ptr, i32 } +// DEBUG1-NEXT: catch ptr null, !dbg [[DBG26]] +// DEBUG1-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0, !dbg [[DBG26]] +// DEBUG1-NEXT: call void @__clang_call_terminate(ptr [[TMP4]]) #[[ATTR8]], !dbg [[DBG26]] +// DEBUG1-NEXT: unreachable, !dbg [[DBG26]] // - -// CHECK2-LABEL: define dso_local void @_Z3foov() #{{.+}} { -// CHECK2: entry: +// +// CHECK2-LABEL: define {{[^@]+}}@_Z3foov +// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: // CHECK2-NEXT: call void @_Z8mayThrowv() // CHECK2-NEXT: ret void - -// CHECK2-LABEL: define dso_local noundef i32 @main() #{{.+}} { -// CHECK2: entry: -// CHECK2-NEXT: %[[RETVAL:.+]] = alloca i32, align 4 -// CHECK2-NEXT: %[[A:.+]] = alloca i8, align 1 -// CHECK2-NEXT: store i32 0, ptr %[[RETVAL]], align 4 -// CHECK2-NEXT: %[[OMP_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @{{.+}}, i32 %[[OMP_THREAD_NUM]]) -// CHECK2-NEXT: store i8 2, ptr %[[A]], align 1 -// CHECK2-NEXT: br label %taskgroup.exit - +// +// +// CHECK2-LABEL: define {{[^@]+}}@main +// CHECK2-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i32 0, ptr [[RETVAL]], align 4 +// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK2-NEXT: store i8 2, ptr [[A]], align 1 +// CHECK2-NEXT: br label [[TASKGROUP_EXIT:%.*]] // CHECK2: taskgroup.exit: -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[OMP_THREAD_NUM]]) -// CHECK2-NEXT: %[[OMP_THREAD_NUM2:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @{{.+}}, i32 %[[OMP_THREAD_NUM2]]) +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) // CHECK2-NEXT: call void @_Z3foov() -// CHECK2-NEXT: br label %taskgroup.exit2 - +// CHECK2-NEXT: br label [[TASKGROUP_EXIT2:%.*]] // CHECK2: taskgroup.exit2: -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[OMP_THREAD_NUM2]]) -// CHECK2-NEXT: %[[TMP:.+]] = load i8, ptr %[[A]], align 1 -// CHECK2-NEXT: %[[CONV:.+]] = sext i8 %[[TMP]] to i32 -// CHECK2-NEXT: ret i32 %[[CONV]] - -// CHECK2-LABEL: define dso_local void @_Z18parallel_taskgroupv() #{{.+}} { -// CHECK2: entry: -// CHECK2-NEXT: %[[OMP_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK2-NEXT: br label %omp_parallel - +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +// CHECK2-NEXT: ret i32 [[CONV]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK2: omp_parallel: -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @{{.+}}, i32 0, ptr @_Z18parallel_taskgroupv..omp_par) -// CHECK2-NEXT: br label %omp.par.outlined.exit - +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @_Z18parallel_taskgroupv..omp_par) +// CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK2: omp.par.outlined.exit: -// CHECK2-NEXT: br label %omp.par.exit.split - +// CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK2: omp.par.exit.split: // CHECK2-NEXT: ret void - -// CHECK2-LABEL: define internal void @_Z18parallel_taskgroupv..omp_par(ptr noalias %{{.+}}, ptr noalias %{{.+}}) #{{.+}} { -// CHECK2: omp.par.entry: -// CHECK2: br label %omp.par.region - +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z18parallel_taskgroupv..omp_par +// CHECK2-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-NEXT: omp.par.entry: +// CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK2-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 +// CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK2: omp.par.region: -// CHECK2-NEXT: %[[OMP_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(ptr @{{.+}}) -// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @{{.+}}, i32 %[[OMP_THREAD_NUM]]) +// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) +// CHECK2-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) // CHECK2-NEXT: call void @_Z3foov() -// CHECK2-NEXT: br label %taskgroup.exit - +// CHECK2-NEXT: br label [[TASKGROUP_EXIT:%.*]] // CHECK2: taskgroup.exit: -// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @{{.+}}, i32 %[[OMP_THREAD_NUM]]) -// CHECK2-NEXT: br label %omp.par.region.parallel.after - +// CHECK2-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +// CHECK2-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]] // CHECK2: omp.par.region.parallel.after: -// CHECK2-NEXT: br label %omp.par.pre_finalize - +// CHECK2-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK2: omp.par.pre_finalize: -// CHECK2-NEXT: br label %omp.par.outlined.exit.exitStub - +// CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK2: omp.par.outlined.exit.exitStub: // CHECK2-NEXT: ret void +// diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp --- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,65 +444,63 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP16]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP12]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP17]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP17]], ptr [[TMP18]], i32 1, ptr [[TMP27]], ptr [[TMP28]], i64 [[TMP31]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -570,7 +579,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -581,7 +590,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp --- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp @@ -55,6 +55,7 @@ // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_3:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 // CHECK1-NEXT: [[DOTTASK_RED_6:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 @@ -158,24 +159,34 @@ // CHECK1-NEXT: store i32 1, ptr [[TMP41]], align 8 // CHECK1-NEXT: [[TMP42:%.*]] = call ptr @__kmpc_taskred_init(i32 [[TMP0]], i32 2, ptr [[DOTRD_INPUT_3]]) // CHECK1-NEXT: store ptr [[TMP42]], ptr [[DOTTASK_RED_6]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[A]], i64 [[TMP2]], ptr [[VLA]], ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED_6]]) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_6]], ptr [[TMP47]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP43]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [5 x %struct.S], ptr [[C]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 5 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP44]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK1: arraydestroy.done8: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP45]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP50]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev @@ -433,65 +444,63 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[VLA:%.*]], ptr nonnull align 2 dereferenceable(2) [[D:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_:%.*]], ptr nonnull align 8 dereferenceable(8) [[DOTTASK_RED_1:%.*]]) #[[ATTR8:[0-9]+]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR8:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTTASK_RED__ADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_1]], ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTTASK_RED__ADDR2]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 4 -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP11]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP12]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP13]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP15]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP16]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 5 -// CHECK1-NEXT: store i64 0, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 6 -// CHECK1-NEXT: store i64 4, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 7 -// CHECK1-NEXT: store i64 1, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP13]], i32 0, i32 9 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP24]], i8 0, i64 8, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[TMP23]], align 8 -// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP11]], ptr [[TMP12]], i32 1, ptr [[TMP21]], ptr [[TMP22]], i64 [[TMP25]], i32 1, i32 0, i64 0, ptr null) -// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP17]], i32 1, i64 96, i64 40, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP18]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP18]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], ptr [[TMP22]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 5 +// CHECK1-NEXT: store i64 0, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 6 +// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 7 +// CHECK1-NEXT: store i64 1, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP19]], i32 0, i32 9 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 8, i1 false) +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[TMP29]], align 8 +// CHECK1-NEXT: call void @__kmpc_taskloop(ptr @[[GLOB1]], i32 [[TMP17]], ptr [[TMP18]], i32 1, ptr [[TMP27]], ptr [[TMP28]], i64 [[TMP31]], i32 1, i32 0, i64 0, ptr null) +// CHECK1-NEXT: call void @__kmpc_end_taskgroup(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: ret void // // @@ -570,7 +579,7 @@ // CHECK1-NEXT: store ptr [[TMP18]], ptr [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !14 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP19]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP19]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 @@ -581,7 +590,7 @@ // CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP24]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP29:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP28]], ptr [[TMP27]], ptr [[TMP26]]) -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP19]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP19]], i32 0, i32 2 // CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = mul nuw i64 [[TMP21]], 2 // CHECK1-NEXT: [[TMP33:%.*]] = udiv exact i64 [[TMP32]], ptrtoint (ptr getelementptr (i16, ptr null, i32 1) to i64) diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -656,24 +656,29 @@ // CHECK1-SAME: (i64 noundef [[COMP:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -681,24 +686,29 @@ // CHECK1-SAME: (i64 noundef [[COMP:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -707,28 +717,33 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[LA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[LA]], ptr [[LA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -737,28 +752,33 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[LA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[LA]], ptr [[LA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -770,6 +790,7 @@ // CHECK1-NEXT: [[GBLB_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[LC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[GBLA]], ptr [[GBLA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 @@ -785,23 +806,27 @@ // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i64 [[TMP3]], [[CONV]] // CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[TMP5]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[COMP_ADDR]]) +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -811,6 +836,7 @@ // CHECK1-NEXT: [[GBLC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK1-NEXT: store i64 [[GBLC]], ptr [[GBLC_ADDR]], align 8 // CHECK1-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 @@ -820,30 +846,35 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 // CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 2 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[ADD1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..13, ptr [[COMP_ADDR]], ptr [[TMP3]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GBLC:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[GBLC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[GBLC]], ptr [[GBLC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GBLC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr @Gbla, align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -1208,24 +1239,29 @@ // CHECK3-SAME: (i32 noundef [[COMP:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1233,24 +1269,29 @@ // CHECK3-SAME: (i32 noundef [[COMP:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1259,28 +1300,33 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LA]], ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1289,28 +1335,33 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[LA_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[LA]], ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[LA_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1323,6 +1374,7 @@ // CHECK3-NEXT: [[LC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GBLB1:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[GBLA]], ptr [[GBLA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -1341,23 +1393,27 @@ // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i64 [[TMP5]], [[CONV]] // CHECK3-NEXT: [[TMP7:%.*]] = trunc i64 [[ADD2]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[TMP7]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[COMP_ADDR]]) +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1367,6 +1423,7 @@ // CHECK3-NEXT: [[GBLC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK3-NEXT: store i32 [[GBLC]], ptr [[GBLC_ADDR]], align 4 // CHECK3-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 @@ -1376,30 +1433,35 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 2 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[ADD]], i32 [[ADD1]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @.omp_outlined..13, ptr [[COMP_ADDR]], ptr [[TMP3]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GBLC:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[GBLC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[GBLC]], ptr [[GBLC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[GBLC_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr @Gbla, align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr @Gbla, align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK3-NEXT: ret void // // @@ -1544,6 +1606,7 @@ // CHECK9-NEXT: [[GBLA_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[LA_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[GBLA]], ptr [[GBLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[LA]], ptr [[LA_ADDR]], align 8 @@ -1557,23 +1620,27 @@ // CHECK9-NEXT: [[CONV:%.*]] = fptosi float [[TMP4]] to i64 // CHECK9-NEXT: [[TMP5:%.*]] = trunc i64 [[CONV]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 [[TMP5]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK9-NEXT: ret void // // @@ -1583,6 +1650,7 @@ // CHECK9-NEXT: [[LB_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[GBLB_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[LB]], ptr [[LB_ADDR]], align 8 // CHECK9-NEXT: store ptr [[GBLB]], ptr [[GBLB_ADDR]], align 8 @@ -1597,23 +1665,27 @@ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[A]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 [[TMP6]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK9-NEXT: ret void // // @@ -1758,6 +1830,7 @@ // CHECK11-NEXT: [[GBLA_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[LA_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[GBLA]], ptr [[GBLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[LA]], ptr [[LA_ADDR]], align 4 @@ -1771,23 +1844,27 @@ // CHECK11-NEXT: [[CONV:%.*]] = fptosi float [[TMP4]] to i64 // CHECK11-NEXT: [[TMP5:%.*]] = trunc i64 [[CONV]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP3]], i32 [[TMP5]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK11-NEXT: ret void // // @@ -1797,6 +1874,7 @@ // CHECK11-NEXT: [[LB_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[GBLB_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[LB]], ptr [[LB_ADDR]], align 4 // CHECK11-NEXT: store ptr [[GBLB]], ptr [[GBLB_ADDR]], align 4 @@ -1811,23 +1889,27 @@ // CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[A]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 [[TMP6]]) -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK11-NEXT: ret void // // @@ -1965,6 +2047,7 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 @@ -1972,23 +2055,27 @@ // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 123) -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK17-NEXT: ret void // // @@ -1997,6 +2084,7 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[COMP]], ptr [[COMP_ADDR]], align 8 @@ -2006,23 +2094,27 @@ // CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP2]] to i32 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 123 // CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 456, i32 [[ADD]]) -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK17-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK17-NEXT: ret void // // @@ -2160,6 +2252,7 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 @@ -2167,23 +2260,27 @@ // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 123) -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[COMP_ADDR]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK19-NEXT: ret void // // @@ -2192,6 +2289,7 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[COMP]], ptr [[COMP_ADDR]], align 4 @@ -2201,23 +2299,27 @@ // CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP2]] to i32 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 123 // CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 456, i32 [[ADD]]) -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[COMP_ADDR]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[COMP_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[COMP:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[COMP_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[COMP]], ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[COMP_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK19-NEXT: store i32 [[INC]], ptr [[TMP2]], align 4 // CHECK19-NEXT: ret void // // @@ -2232,22 +2334,27 @@ // CHECK25-SAME: (i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK25-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK25-NEXT: ret void // // @@ -2255,22 +2362,27 @@ // CHECK25-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK25-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK25-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK25-NEXT: ret void // // @@ -2278,22 +2390,27 @@ // CHECK27-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK27-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK27-NEXT: ret void // // @@ -2301,22 +2418,27 @@ // CHECK27-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK27-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK27-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK27-NEXT: ret void // // @@ -2326,6 +2448,7 @@ // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK33-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK33-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -2333,21 +2456,25 @@ // CHECK33-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK33-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK33-NEXT: ret void // // @@ -2357,6 +2484,7 @@ // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK33-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK33-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 @@ -2364,21 +2492,25 @@ // CHECK33-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK33-NEXT: store ptr null, ptr [[TMP0]], align 8 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK33-NEXT: store ptr null, ptr [[TMP2]], align 8 // CHECK33-NEXT: ret void // // @@ -2388,6 +2520,7 @@ // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK35-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK35-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -2395,21 +2528,25 @@ // CHECK35-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[ARGC_ADDR]]) +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: store i32 0, ptr [[TMP0]], align 4 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK35-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK35-NEXT: ret void // // @@ -2419,6 +2556,7 @@ // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK35-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK35-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 @@ -2426,54 +2564,66 @@ // CHECK35-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[ARGC_ADDR]]) +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP3]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR0]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR0]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4 -// CHECK35-NEXT: store ptr null, ptr [[TMP0]], align 4 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK35-NEXT: store ptr null, ptr [[TMP2]], align 4 // CHECK35-NEXT: ret void // // // CHECK41-LABEL: define {{[^@]+}}@_Z3foov // CHECK41-SAME: () #[[ATTR0:[0-9]+]] { // CHECK41-NEXT: entry: -// CHECK41-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK41-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK41-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK41-NEXT: ret void // // // CHECK41-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK41-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK41-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK41-NEXT: entry: // CHECK41-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK41-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK41-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK41-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK41-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK41-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK41-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK41-NEXT: ret void // // // CHECK43-LABEL: define {{[^@]+}}@_Z3foov // CHECK43-SAME: () #[[ATTR0:[0-9]+]] { // CHECK43-NEXT: entry: -// CHECK43-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK43-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK43-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK43-NEXT: ret void // // // CHECK43-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK43-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK43-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK43-NEXT: entry: // CHECK43-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK43-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK43-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK43-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK43-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK43-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK43-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK43-NEXT: ret void // diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -339,6 +339,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -348,17 +349,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -371,74 +375,76 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -449,20 +455,24 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -475,74 +485,76 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -719,6 +731,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -728,17 +741,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -751,73 +767,75 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -828,20 +846,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -854,73 +876,75 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1031,23 +1055,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1060,76 +1089,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1240,23 +1271,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1269,75 +1305,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1413,18 +1451,21 @@ // CHECK17-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1434,56 +1475,58 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // @@ -1557,18 +1600,21 @@ // CHECK19-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1578,55 +1624,57 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // @@ -1743,23 +1791,28 @@ // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1772,76 +1825,78 @@ // CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK25-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK25-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK25-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -1926,6 +1981,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -1934,16 +1990,18 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1953,55 +2011,57 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK25-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK25-NEXT: ret void // // @@ -2118,23 +2178,28 @@ // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2147,75 +2212,77 @@ // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK27-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK27-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -2300,6 +2367,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -2308,16 +2376,18 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2327,54 +2397,56 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK27-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp @@ -119,34 +119,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -154,26 +154,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -185,68 +188,70 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -279,34 +284,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -314,26 +319,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -345,66 +353,68 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -433,10 +443,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -461,91 +471,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82 @@ -556,6 +566,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -564,153 +575,161 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -729,34 +748,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -768,18 +787,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -791,67 +813,69 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -907,91 +931,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l82 @@ -1002,6 +1026,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1010,151 +1035,159 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1173,34 +1206,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l68(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1212,18 +1245,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1235,65 +1271,67 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -272,18 +272,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -293,56 +296,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -350,18 +355,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -371,56 +379,58 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -428,18 +438,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -449,73 +462,75 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // @@ -667,18 +682,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -688,55 +706,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -744,18 +764,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -765,55 +788,57 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -821,18 +846,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -842,72 +870,74 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // @@ -1168,23 +1198,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1197,76 +1232,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1278,23 +1315,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1307,76 +1349,78 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1389,7 +1433,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -1397,22 +1441,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1425,95 +1473,99 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1646,18 +1698,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1667,55 +1722,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1723,18 +1780,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1744,55 +1804,57 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -1800,18 +1862,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1821,72 +1886,74 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // @@ -2149,23 +2216,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2178,75 +2250,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2258,23 +2332,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2287,75 +2366,77 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2368,7 +2449,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -2376,22 +2457,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2404,94 +2489,98 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2624,18 +2713,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2645,54 +2737,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2700,18 +2794,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2721,54 +2818,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // @@ -2776,18 +2875,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2797,71 +2899,73 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -342,8 +342,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -352,138 +351,148 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -692,7 +701,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -701,23 +710,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -725,112 +738,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1129,8 +1146,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1139,136 +1155,146 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1477,7 +1503,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1486,23 +1512,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1510,110 +1540,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -1842,35 +1876,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1882,65 +1914,73 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp @@ -163,25 +163,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -189,97 +195,99 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP27]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP31]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -321,6 +329,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -334,20 +343,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -355,97 +369,99 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP22]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP23]], align 4 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP26]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP27]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[TMP28]], align 4 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP31]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -605,6 +621,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -614,21 +631,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -636,31 +659,33 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -670,99 +695,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP22]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -940,6 +965,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -948,20 +974,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -969,28 +1000,30 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1000,97 +1033,97 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP23]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP24]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1292,6 +1325,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1301,21 +1335,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1323,31 +1363,33 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1357,97 +1399,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP22]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP25]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP31]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP33]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1625,6 +1667,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1633,20 +1676,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1654,28 +1702,30 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1685,95 +1735,95 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP15]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP20]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP24]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP29]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -338,6 +338,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -347,17 +348,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -367,87 +371,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -460,73 +476,77 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP20]], i32 2) -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: // CHECK1-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK1: .cancel.continue: @@ -534,21 +554,21 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: cancel.exit: -// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK1-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK1: omp.precond.end: // CHECK1-NEXT: br label [[CANCEL_CONT]] @@ -561,20 +581,24 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -584,87 +608,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -677,82 +713,86 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void @@ -928,6 +968,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -937,17 +978,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -957,85 +1001,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1048,70 +1104,74 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP20]], i32 2) -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = call i32 @__kmpc_cancel(ptr @[[GLOB3]], i32 [[TMP27]], i32 2) +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: // CHECK3-NEXT: br label [[CANCEL_EXIT:%.*]] // CHECK3: .cancel.continue: @@ -1119,21 +1179,21 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: cancel.exit: -// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK3-NEXT: br label [[CANCEL_CONT:%.*]] // CHECK3: omp.precond.end: // CHECK3-NEXT: br label [[CANCEL_CONT]] @@ -1146,20 +1206,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1169,85 +1233,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1260,79 +1336,83 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void @@ -1443,23 +1523,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1469,90 +1554,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1565,84 +1663,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1753,23 +1855,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1779,88 +1886,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1873,81 +1993,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -2023,83 +2147,97 @@ // CHECK17-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2109,64 +2247,68 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -2240,81 +2382,95 @@ // CHECK19-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2324,61 +2480,65 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -2495,23 +2655,28 @@ // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2521,90 +2686,103 @@ // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK25-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK25-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK25-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK25-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK25-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK25-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK25-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2617,84 +2795,88 @@ // CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK25-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK25-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK25-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK25-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK25-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK25-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK25-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK25-NEXT: br label [[OMP_PRECOND_END]] // CHECK25: omp.precond.end: // CHECK25-NEXT: ret void @@ -2778,6 +2960,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -2786,81 +2969,94 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK25-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK25-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK25-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK25-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2870,63 +3066,67 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK25-NEXT: ret void // // @@ -3043,23 +3243,28 @@ // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3069,88 +3274,101 @@ // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK27-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK27-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK27-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3163,81 +3381,85 @@ // CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK27-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK27-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK27-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK27-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK27-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK27-NEXT: br label [[OMP_PRECOND_END]] // CHECK27: omp.precond.end: // CHECK27-NEXT: ret void @@ -3321,6 +3543,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -3329,79 +3552,92 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3411,60 +3647,64 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK27-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp @@ -124,34 +124,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -159,26 +159,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -186,66 +189,77 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -257,76 +271,80 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -359,34 +377,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -394,26 +412,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -421,64 +442,75 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -490,72 +522,76 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -584,10 +620,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -612,91 +648,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 @@ -707,6 +743,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -715,273 +752,298 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP35]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4 +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1000,34 +1062,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1039,18 +1101,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1058,66 +1123,77 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1129,75 +1205,79 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -1253,91 +1333,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 @@ -1348,6 +1428,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1356,275 +1437,300 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP34]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] // CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4 +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -1643,34 +1749,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1682,18 +1788,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1701,64 +1810,75 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1770,71 +1890,75 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] // CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp @@ -109,40 +109,40 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP2]], ptr [[A]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -156,88 +156,104 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -247,66 +263,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -325,40 +345,40 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[X_CASTED]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP15]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.4, ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, ptr [[TMP12]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.4, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34(i64 [[TMP2]], ptr [[A]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -371,88 +391,104 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -462,66 +498,70 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK1-NEXT: ret void // // @@ -555,40 +595,40 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i32 [[TMP2]], ptr [[A]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -602,86 +642,102 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP9]], i32 [[TMP10]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -691,63 +747,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -766,40 +826,40 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.4, ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.4, ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP19]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34(i32 [[TMP2]], ptr [[A]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -812,86 +872,102 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..3, i32 [[TMP9]], i32 [[TMP10]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -901,63 +977,67 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK3-NEXT: ret void // // @@ -992,88 +1072,104 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], ptr [[X_ADDR]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[X_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], ptr [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1081,76 +1177,80 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP3]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP5]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: store ptr [[I]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[I]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP23]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -284,83 +284,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -370,64 +384,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -435,83 +453,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -521,64 +553,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -586,103 +622,117 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -692,64 +742,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -901,81 +955,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -985,61 +1053,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1047,81 +1119,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1131,61 +1217,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1193,101 +1283,115 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1297,61 +1401,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1624,23 +1732,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1650,90 +1763,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1746,84 +1872,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -1835,23 +1965,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1861,90 +1996,103 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1957,84 +2105,88 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2048,7 +2200,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2057,22 +2209,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2082,121 +2238,135 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2209,85 +2379,91 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void @@ -2432,83 +2608,97 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2518,63 +2708,67 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2582,83 +2776,97 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2668,63 +2876,67 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2734,118 +2946,132 @@ // CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2855,64 +3081,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -3187,23 +3419,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3213,88 +3450,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3307,81 +3557,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3393,23 +3647,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3419,88 +3678,101 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3513,81 +3785,85 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3601,7 +3877,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -3610,22 +3886,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3635,119 +3915,133 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3760,82 +4054,88 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void @@ -3980,81 +4280,95 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4064,60 +4378,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4125,81 +4443,95 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4209,60 +4541,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -4272,116 +4608,130 @@ // CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4391,61 +4741,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -376,8 +376,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -386,133 +385,156 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -550,137 +572,141 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -688,10 +714,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -702,9 +728,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -768,12 +794,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -856,7 +882,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -865,23 +891,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -889,107 +919,125 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1007,16 +1055,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1024,120 +1068,126 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1157,7 +1207,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1171,7 +1221,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1187,9 +1237,9 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1423,8 +1473,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1433,131 +1482,154 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1595,133 +1667,137 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1729,10 +1805,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1743,9 +1819,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1809,12 +1885,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1897,7 +1973,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1906,23 +1982,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1930,105 +2010,123 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP21]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2046,16 +2144,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2063,116 +2157,122 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2192,7 +2292,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2206,7 +2306,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2222,9 +2322,9 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -2388,35 +2488,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2424,82 +2522,94 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 4 +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2508,76 +2618,86 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP21]], align 4 +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -176,77 +176,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l48 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -256,139 +268,155 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l52 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -398,43 +426,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z9gtid_testv() @@ -442,14 +474,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -523,124 +555,136 @@ // CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP32]] to i1 -// CHECK1-NEXT: [[TMP33:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP30]] to i1 +// CHECK1-NEXT: [[TMP31:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1 // CHECK1-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP33]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97.region_id, ptr [[KERNEL_ARGS7]]) -// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 -// CHECK1-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP31]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97.region_id, ptr [[KERNEL_ARGS7]]) +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] // CHECK1: omp_offload.failed8: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l97(i64 [[TMP23]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT9]] // CHECK1: omp_offload.cont9: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr @Arg, align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP45]]) +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr @Arg, align 4 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiEiT_(i32 noundef [[TMP43]]) // CHECK1-NEXT: ret i32 [[CALL]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l81 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -650,43 +694,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn4v() @@ -694,96 +742,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l89 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -793,43 +853,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn5v() @@ -837,14 +901,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -853,104 +917,119 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -960,43 +1039,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn6v() @@ -1004,14 +1087,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1085,41 +1168,41 @@ // CHECK1-NEXT: [[TMP23:%.*]] = load i64, ptr [[ARG_CASTED]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP32]] to i1 -// CHECK1-NEXT: [[TMP33:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP30:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP30]] to i1 +// CHECK1-NEXT: [[TMP31:%.*]] = select i1 [[TOBOOL5]], i32 0, i32 1 // CHECK1-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP34]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP37]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8 -// CHECK1-NEXT: store i64 100, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP33]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72.region_id, ptr [[KERNEL_ARGS7]]) -// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 -// CHECK1-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP28]], ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.16, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.17, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK1-NEXT: store i64 100, ptr [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 [[TMP31]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72.region_id, ptr [[KERNEL_ARGS7]]) +// CHECK1-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK1-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] // CHECK1: omp_offload.failed8: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l72(i64 [[TMP23]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT9]] @@ -1130,77 +1213,89 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1210,43 +1305,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn1v() @@ -1254,96 +1353,108 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l67 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1353,43 +1464,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn2v() @@ -1397,14 +1512,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -1413,104 +1528,119 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 -// CHECK1-NEXT: [[FROMBOOL3:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1520,43 +1650,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: call void @_Z3fn3v() @@ -1564,14 +1698,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -201,25 +201,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -227,98 +233,112 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP13]], i64 [[TMP15]], ptr [[G2]], ptr [[TMP16]], ptr [[SVAR5]], ptr [[SFVAR6]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP21]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP25]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP32]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[TMP33]], align 8 +// CHECK1-NEXT: store volatile double [[TMP34]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP35]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP36]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -326,105 +346,109 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8 -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8 +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -466,6 +490,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -479,20 +504,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -500,96 +530,110 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP12]], i32 [[TMP13]], ptr [[G2]], ptr [[TMP14]], ptr [[SVAR5]], ptr [[SFVAR6]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP19]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[TMP20]], align 4 -// CHECK3-NEXT: store volatile double [[TMP21]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP23]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP30]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store volatile double [[TMP32]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP34]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -597,103 +641,107 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4 -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP33]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP36]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP37]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -853,6 +901,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -862,21 +911,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -884,31 +939,36 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -918,105 +978,115 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP17]], ptr [[SVAR7]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1024,39 +1094,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1066,99 +1140,99 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1176,10 +1250,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1190,9 +1264,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1256,12 +1330,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1336,6 +1410,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1344,20 +1419,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1365,133 +1445,147 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP16]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done9: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP25]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP36]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done11: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1499,136 +1593,140 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1648,7 +1746,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1661,7 +1759,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1830,6 +1928,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1839,21 +1938,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1861,31 +1966,36 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1895,103 +2005,113 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP15]], ptr [[SVAR7]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP24]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1999,37 +2119,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2039,97 +2163,97 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP37]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP38]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2147,10 +2271,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2161,9 +2285,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2227,12 +2351,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2307,6 +2431,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2315,20 +2440,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2336,131 +2466,145 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP14]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP22]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done9: -// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP23]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP33]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP34]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done11: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2468,132 +2612,136 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false) // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP34]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP35]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2613,7 +2761,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2626,7 +2774,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -220,78 +220,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -301,43 +313,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -347,20 +363,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9:[0-9]+]] // CHECK1-NEXT: unreachable // // @@ -376,89 +392,102 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -468,43 +497,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -514,20 +547,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -703,78 +736,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -784,43 +829,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -830,98 +879,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -931,43 +992,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -977,98 +1042,110 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1078,43 +1155,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1124,20 +1205,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -1146,17 +1227,17 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -1167,77 +1248,90 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1247,43 +1341,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: invoke void @_Z3foov() @@ -1293,20 +1391,20 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK1-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK1-NEXT: unreachable // // @@ -1464,78 +1562,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1545,43 +1655,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1591,20 +1705,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9:[0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9:[0-9]+]] // CHECK5-NEXT: unreachable // // @@ -1620,89 +1734,102 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1712,43 +1839,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -1758,20 +1889,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // @@ -1938,78 +2069,90 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2019,43 +2162,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2065,98 +2212,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2166,43 +2325,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2212,98 +2375,110 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2313,43 +2488,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2359,20 +2538,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // @@ -2381,17 +2560,17 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK5: invoke.cont: // CHECK5-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK5-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: // CHECK5-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -2402,77 +2581,90 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2482,43 +2674,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK5-NEXT: invoke void @_Z3foov() @@ -2528,20 +2724,20 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP11:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP16:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP12:%.*]] = extractvalue { ptr, i32 } [[TMP11]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP12]]) #[[ATTR9]] +// CHECK5-NEXT: [[TMP17:%.*]] = extractvalue { ptr, i32 } [[TMP16]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP17]]) #[[ATTR9]] // CHECK5-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp @@ -294,29 +294,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -332,55 +339,61 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -390,12 +403,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -410,14 +422,18 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -433,68 +449,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -507,19 +523,19 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -550,12 +566,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -591,15 +607,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -607,81 +625,92 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -690,12 +719,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -705,97 +733,101 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false) +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -819,7 +851,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -833,7 +865,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1018,29 +1050,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l96 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1056,53 +1095,59 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1112,12 +1157,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1132,14 +1176,18 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1153,66 +1201,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1225,19 +1273,19 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1268,12 +1316,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1309,15 +1357,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1325,79 +1375,90 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1406,12 +1467,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1421,93 +1481,97 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1531,7 +1595,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1545,7 +1609,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1706,17 +1770,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1724,68 +1790,78 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1798,74 +1874,78 @@ // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -116,78 +116,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -197,135 +209,151 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l39 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -335,57 +363,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // @@ -425,78 +457,90 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -506,57 +550,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) // CHECK1-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp @@ -108,34 +108,34 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i64 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -148,190 +148,208 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -345,15 +363,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -365,15 +383,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -389,41 +407,41 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret i32 0 @@ -433,190 +451,208 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -630,15 +666,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -650,15 +686,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -684,34 +720,34 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68(i32 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -724,186 +760,204 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -917,15 +971,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -937,15 +991,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -961,41 +1015,41 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret i32 0 @@ -1005,186 +1059,204 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]) +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP21]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP24]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1198,15 +1270,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1218,15 +1290,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1251,194 +1323,212 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]) +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP25]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -1452,15 +1542,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // @@ -1472,15 +1562,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -57,258 +57,277 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[ARGC_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[_TMP12:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP11:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP26:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 9 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = sub i64 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: [[TMP10:%.*]] = sdiv exact i64 [[TMP9]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP11:%.*]] = add nuw i64 [[TMP10]], 1 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP13]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP11]], align 16 -// CHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP14]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP8]], i64 9 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = sub i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = sdiv exact i64 [[TMP12]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP14:%.*]] = add nuw i64 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP14]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP16:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP16]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP14]], align 16 +// CHECK1-NEXT: store i64 [[TMP14]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = sub i64 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: [[TMP20:%.*]] = sdiv exact i64 [[TMP19]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] -// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP21]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[TMP19]] to i64 +// CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP23]] +// CHECK1-NEXT: store ptr [[_TMP4]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP24]], ptr [[_TMP4]], align 8 // CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP30]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 -// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init., ptr [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb., ptr [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP31]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_5:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP33:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds ptr, ptr [[TMP33]], i64 0 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[ARRAYIDX6]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i64 0 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP36]] -// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 -// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: [[LB_ADD_LEN8:%.*]] = add nsw i64 -1, [[TMP36]] +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds ptr, ptr [[TMP37]], i64 9 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[ARRAYIDX9]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 [[LB_ADD_LEN8]] // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP39]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 -// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[ARRAYIDX10]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[ARRAYIDX7]] to i64 // CHECK1-NEXT: [[TMP42:%.*]] = sub i64 [[TMP40]], [[TMP41]] // CHECK1-NEXT: [[TMP43:%.*]] = sdiv exact i64 [[TMP42]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK1-NEXT: [[TMP44:%.*]] = add nuw i64 [[TMP43]], 1 // CHECK1-NEXT: [[TMP45:%.*]] = mul nuw i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 2 // CHECK1-NEXT: store i64 [[TMP45]], ptr [[TMP46]], align 8 -// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 3 // CHECK1-NEXT: store ptr @.red_init..1, ptr [[TMP47]], align 8 -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 4 // CHECK1-NEXT: store ptr null, ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 5 // CHECK1-NEXT: store ptr @.red_comb..2, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T]], ptr [[DOTRD_INPUT_GEP_5]], i32 0, i32 6 // CHECK1-NEXT: store i32 1, ptr [[TMP50]], align 8 // CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP54]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP52]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP53]], ptr [[DOTTASK_RED_]], align 8 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP56]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP57]], 9 +// CHECK1-NEXT: [[TMP54:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, ptr [[TMP54]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP55]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP56]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP58]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP57]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP59]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP58]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP60]], [[TMP61]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP59]], [[TMP60]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, i64 [[TMP62]], i64 [[TMP63]], ptr [[ARGC1]], ptr [[TMP64]]) +// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 [[TMP62]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP63]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP67]], ptr [[TMP66]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: [[TMP68:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP68]], [[TMP69]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP67:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP68]]) -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP70]], i32 1) -// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP71]], align 8 -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP73]], align 8 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP75:%.*]] = inttoptr i64 [[TMP11]] to ptr -// CHECK1-NEXT: store ptr [[TMP75]], ptr [[TMP74]], align 8 -// CHECK1-NEXT: [[TMP76:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP77:%.*]] = load i32, ptr [[TMP76]], align 4 -// CHECK1-NEXT: [[TMP79:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP77]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP79]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP70:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP71]]) +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP73]], i32 1) +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP74]], align 8 +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP77:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK1-NEXT: store ptr [[TMP77]], ptr [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP78:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = load i32, ptr [[TMP78]], align 4 +// CHECK1-NEXT: [[TMP80:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4:[0-9]+]], i32 [[TMP79]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.9, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP80]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP80]], [[TMP81]] -// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP81]], [[TMP82]] +// CHECK1-NEXT: store i32 [[ADD13]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP83]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE20:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP83:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP83]] to i32 -// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP84]] to i32 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] -// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 -// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST14:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT18:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP84:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP84]] to i32 +// CHECK1-NEXT: [[TMP85:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV15:%.*]] = sext i8 [[TMP85]] to i32 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV]], [[CONV15]] +// CHECK1-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 +// CHECK1-NEXT: store i8 [[CONV17]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT18]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST14]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP82]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done21: -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP77]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE19:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT18]], [[TMP83]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE19]], label [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done20: +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP79]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP86:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP85]] monotonic, align 4 -// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP11]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] -// CHECK1: omp.arraycpy.body23: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP88:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP88]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP87:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP86]] monotonic, align 4 +// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY21:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP88]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY21]], label [[OMP_ARRAYCPY_DONE34:%.*]], label [[OMP_ARRAYCPY_BODY22:%.*]] +// CHECK1: omp.arraycpy.body22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST23:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT32:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST24:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT31:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP89:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV25:%.*]] = sext i8 [[TMP89]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP89:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP94:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP89]], ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[TMP90:%.*]] = load i8, ptr [[_TMP27]], align 1 -// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP90]] to i32 -// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 -// CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP91]] to i32 -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] -// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 -// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP93:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP89]], i8 [[TMP92]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP94]] = extractvalue { i8, i1 } [[TMP93]], 0 -// CHECK1-NEXT: [[TMP95:%.*]] = extractvalue { i8, i1 } [[TMP93]], 1 -// CHECK1-NEXT: br i1 [[TMP95]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[TMP90:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY22]] ], [ [[TMP95:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP90]], ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[TMP91:%.*]] = load i8, ptr [[_TMP26]], align 1 +// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP91]] to i32 +// CHECK1-NEXT: [[TMP92:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP92]] to i32 +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[CONV27]], [[CONV28]] +// CHECK1-NEXT: [[CONV30:%.*]] = trunc i32 [[ADD29]] to i8 +// CHECK1-NEXT: store i8 [[CONV30]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP93:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP94:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i8 [[TMP90]], i8 [[TMP93]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP95]] = extractvalue { i8, i1 } [[TMP94]], 0 +// CHECK1-NEXT: [[TMP96:%.*]] = extractvalue { i8, i1 } [[TMP94]], 1 +// CHECK1-NEXT: br i1 [[TMP96]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP87]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] -// CHECK1: omp.arraycpy.done35: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT31]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST23]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE33:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT31]], [[TMP88]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE33]], label [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_BODY22]] +// CHECK1: omp.arraycpy.done34: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP96:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP96]]) +// CHECK1-NEXT: [[TMP97:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP97]]) // CHECK1-NEXT: ret void // // @@ -319,8 +338,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -331,12 +350,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -396,283 +415,284 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ARGC1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[ARGC:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.0], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_:%.*]] = alloca [2 x %struct.kmp_taskred_input_t.1], align 8 // CHECK1-NEXT: [[DOTTASK_RED_:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK1-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [3 x ptr], align 8 // CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[_TMP28:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[_TMP27:%.*]] = alloca i8, align 1 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP6]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP7]], i64 9 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 [[LB_ADD_LEN]] -// CHECK1-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[ARRAYIDX4]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: [[TMP12:%.*]] = sdiv exact i64 [[TMP11]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP13:%.*]] = add nuw i64 [[TMP12]], 1 -// CHECK1-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() -// CHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP13]], align 16 -// CHECK1-NEXT: store i64 [[TMP13]], ptr [[__VLA_EXPR0]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP16]] +// CHECK1-NEXT: store i32 0, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN:%.*]] = add nsw i64 -1, [[TMP13]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[TMP14]], i64 9 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 [[LB_ADD_LEN]] +// CHECK1-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX3]] to i64 +// CHECK1-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = sub i64 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP19:%.*]] = sdiv exact i64 [[TMP18]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP20:%.*]] = add nuw i64 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP21:%.*]] = mul nuw i64 [[TMP20]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @llvm.stacksave() +// CHECK1-NEXT: store ptr [[TMP22]], ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca i8, i64 [[TMP20]], align 16 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[VLA]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP16]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint ptr [[ARRAYIDX2]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = sub i64 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: [[TMP22:%.*]] = sdiv exact i64 [[TMP21]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP22]] -// CHECK1-NEXT: store ptr [[_TMP6]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP23]], ptr [[_TMP6]], align 8 -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 -// CHECK1-NEXT: store i64 4, ptr [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 -// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP32]], i8 0, i64 4, i1 false) -// CHECK1-NEXT: [[DOTRD_INPUT_GEP_7:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.0], ptr [[DOTRD_INPUT_]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds ptr, ptr [[TMP35]], i64 0 -// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[ARRAYIDX8]], align 8 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP36]], i64 0 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = sext i32 [[TMP37]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN10:%.*]] = add nsw i64 -1, [[TMP38]] -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 9 -// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX11]], align 8 -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 [[LB_ADD_LEN10]] -// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 8 -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = ptrtoint ptr [[ARRAYIDX12]] to i64 -// CHECK1-NEXT: [[TMP43:%.*]] = ptrtoint ptr [[ARRAYIDX9]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = sub i64 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = sdiv exact i64 [[TMP44]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP46:%.*]] = add nuw i64 [[TMP45]], 1 -// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 2 -// CHECK1-NEXT: store i64 [[TMP47]], ptr [[TMP48]], align 8 -// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 3 -// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP49]], align 8 -// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 4 -// CHECK1-NEXT: store ptr null, ptr [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_0]], ptr [[DOTRD_INPUT_GEP_7]], i32 0, i32 6 -// CHECK1-NEXT: store i32 1, ptr [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, ptr [[TMP53]], align 4 -// CHECK1-NEXT: [[TMP56:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP54]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) -// CHECK1-NEXT: store ptr [[TMP56]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = ptrtoint ptr [[TMP25]] to i64 +// CHECK1-NEXT: [[TMP27:%.*]] = ptrtoint ptr [[ARRAYIDX1]] to i64 +// CHECK1-NEXT: [[TMP28:%.*]] = sub i64 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: [[TMP29:%.*]] = sdiv exact i64 [[TMP28]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[VLA]], i64 [[TMP29]] +// CHECK1-NEXT: store ptr [[_TMP5]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP30]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1:%.*]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 4, ptr [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..4, ptr [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..5, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_]], i32 0, i32 6 +// CHECK1-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[TMP37]], i8 0, i64 4, i1 false) +// CHECK1-NEXT: [[DOTRD_INPUT_GEP_6:%.*]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t.1], ptr [[DOTRD_INPUT_]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[TMP39]], i64 0 +// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i8, ptr [[TMP40]], i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN9:%.*]] = add nsw i64 -1, [[TMP42]] +// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds ptr, ptr [[TMP43]], i64 9 +// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[ARRAYIDX10]], align 8 +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, ptr [[TMP44]], i64 [[LB_ADD_LEN9]] +// CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = ptrtoint ptr [[ARRAYIDX11]] to i64 +// CHECK1-NEXT: [[TMP47:%.*]] = ptrtoint ptr [[ARRAYIDX8]] to i64 +// CHECK1-NEXT: [[TMP48:%.*]] = sub i64 [[TMP46]], [[TMP47]] +// CHECK1-NEXT: [[TMP49:%.*]] = sdiv exact i64 [[TMP48]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP50:%.*]] = add nuw i64 [[TMP49]], 1 +// CHECK1-NEXT: [[TMP51:%.*]] = mul nuw i64 [[TMP50]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP51]], ptr [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 3 +// CHECK1-NEXT: store ptr @.red_init..6, ptr [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 4 +// CHECK1-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.red_comb..7, ptr [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASKRED_INPUT_T_1]], ptr [[DOTRD_INPUT_GEP_6]], i32 0, i32 6 +// CHECK1-NEXT: store i32 1, ptr [[TMP56]], align 8 // CHECK1-NEXT: [[TMP57:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP58:%.*]] = load i32, ptr [[TMP57]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP58]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP59]], 9 +// CHECK1-NEXT: [[TMP59:%.*]] = call ptr @__kmpc_taskred_modifier_init(ptr @[[GLOB1]], i32 [[TMP58]], i32 1, i32 2, ptr [[DOTRD_INPUT_]]) +// CHECK1-NEXT: store ptr [[TMP59]], ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, ptr [[TMP60]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3:[0-9]+]], i32 [[TMP61]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP62]], 9 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP60]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ 9, [[COND_TRUE]] ], [ [[TMP63]], [[COND_FALSE]] ] // CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP61]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP64]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sle i64 [[TMP62]], [[TMP63]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP65]], [[TMP66]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP64]], 1 +// CHECK1-NEXT: [[TMP67:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP67]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8 -// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP65]], align 8 -// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP66]], align 8 -// CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP68:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: store ptr [[TMP68]], ptr [[TMP67]], align 8 -// CHECK1-NEXT: [[TMP69:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4 -// CHECK1-NEXT: [[TMP71:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP70]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) -// CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP71]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP73]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP75:%.*]] = load ptr, ptr [[TMP74]], align 8 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP75]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) -// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP71]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP77]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP79:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 -// CHECK1-NEXT: store ptr [[TMP79]], ptr [[TMP78]], align 8 -// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -// CHECK1-NEXT: [[TMP82:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP81]], ptr [[TMP71]]) +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTTASK_RED_]], ptr [[TMP68]], align 8 +// CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP71:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// CHECK1-NEXT: store ptr [[TMP71]], ptr [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP73:%.*]] = load i32, ptr [[TMP72]], align 4 +// CHECK1-NEXT: [[TMP74:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP73]], i32 1, i64 48, i64 24, ptr @.omp_task_entry.) +// CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP74]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP75]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP77:%.*]] = load ptr, ptr [[TMP76]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP77]], ptr align 8 [[AGG_CAPTURED]], i64 24, i1 false) +// CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP74]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP78]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP80:%.*]] = load ptr, ptr [[DOTTASK_RED_]], align 8 +// CHECK1-NEXT: store ptr [[TMP80]], ptr [[TMP79]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP81]], align 4 +// CHECK1-NEXT: [[TMP83:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP82]], ptr [[TMP74]]) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP83:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP83]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[TMP84]], 1 +// CHECK1-NEXT: store i64 [[ADD13]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP84:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP85:%.*]] = load i32, ptr [[TMP84]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP85]]) -// CHECK1-NEXT: [[TMP86:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP87:%.*]] = load i32, ptr [[TMP86]], align 4 -// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP87]], i32 1) -// CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[ARGC1]], ptr [[TMP88]], align 8 +// CHECK1-NEXT: [[TMP85:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP86]]) +// CHECK1-NEXT: [[TMP87:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP87]], align 4 +// CHECK1-NEXT: call void @__kmpc_task_reduction_modifier_fini(ptr @[[GLOB1]], i32 [[TMP88]], i32 1) +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[ARGC]], ptr [[TMP89]], align 8 // CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[VLA]], ptr [[TMP90]], align 8 // CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP13]] to ptr +// CHECK1-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP20]] to ptr // CHECK1-NEXT: store ptr [[TMP92]], ptr [[TMP91]], align 8 // CHECK1-NEXT: [[TMP93:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP94:%.*]] = load i32, ptr [[TMP93]], align 4 -// CHECK1-NEXT: [[TMP96:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP96]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP95:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], i32 2, i64 24, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP95]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP98:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP97]], [[TMP98]] -// CHECK1-NEXT: store i32 [[ADD15]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE22:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP96]], [[TMP97]] +// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE21:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST16:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT20:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP100]] to i32 -// CHECK1-NEXT: [[TMP101:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP101]] to i32 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV]], [[CONV17]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], align 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT20]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST16]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST15:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE1]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT19:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP99:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP99]] to i32 +// CHECK1-NEXT: [[TMP100:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV16:%.*]] = sext i8 [[TMP100]] to i32 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[CONV]], [[CONV16]] +// CHECK1-NEXT: [[CONV18:%.*]] = trunc i32 [[ADD17]] to i8 +// CHECK1-NEXT: store i8 [[CONV18]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], align 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT19]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST15]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE21:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT20]], [[TMP99]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_DONE22]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done22: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE20:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT19]], [[TMP98]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE20]], label [[OMP_ARRAYCPY_DONE21]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done21: // CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB4]], i32 [[TMP94]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP102:%.*]] = load i32, ptr [[ARGC1]], align 4 -// CHECK1-NEXT: [[TMP103:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP102]] monotonic, align 4 -// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr i8, ptr [[ARRAYIDX2]], i64 [[TMP13]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY23:%.*]] = icmp eq ptr [[ARRAYIDX2]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY23]], label [[OMP_ARRAYCPY_DONE36:%.*]], label [[OMP_ARRAYCPY_BODY24:%.*]] -// CHECK1: omp.arraycpy.body24: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST25:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT34:%.*]], [[ATOMIC_EXIT:%.*]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST26:%.*]] = phi ptr [ [[ARRAYIDX2]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT33:%.*]], [[ATOMIC_EXIT]] ] -// CHECK1-NEXT: [[TMP105:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV27:%.*]] = sext i8 [[TMP105]] to i32 -// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]] monotonic, align 1 +// CHECK1-NEXT: [[TMP101:%.*]] = load i32, ptr [[ARGC]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP101]] monotonic, align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr i8, ptr [[ARRAYIDX1]], i64 [[TMP20]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY22:%.*]] = icmp eq ptr [[ARRAYIDX1]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY22]], label [[OMP_ARRAYCPY_DONE35:%.*]], label [[OMP_ARRAYCPY_BODY23:%.*]] +// CHECK1: omp.arraycpy.body23: +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST24:%.*]] = phi ptr [ [[VLA]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT33:%.*]], [[ATOMIC_EXIT:%.*]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST25:%.*]] = phi ptr [ [[ARRAYIDX1]], [[DOTOMP_REDUCTION_CASE2]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT32:%.*]], [[ATOMIC_EXIT]] ] +// CHECK1-NEXT: [[TMP104:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 +// CHECK1-NEXT: [[CONV26:%.*]] = sext i8 [[TMP104]] to i32 +// CHECK1-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]] monotonic, align 1 // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP106:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY24]] ], [ [[TMP111:%.*]], [[ATOMIC_CONT]] ] -// CHECK1-NEXT: store i8 [[TMP106]], ptr [[_TMP28]], align 1 -// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[_TMP28]], align 1 +// CHECK1-NEXT: [[TMP105:%.*]] = phi i8 [ [[ATOMIC_LOAD]], [[OMP_ARRAYCPY_BODY23]] ], [ [[TMP110:%.*]], [[ATOMIC_CONT]] ] +// CHECK1-NEXT: store i8 [[TMP105]], ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[TMP106:%.*]] = load i8, ptr [[_TMP27]], align 1 +// CHECK1-NEXT: [[CONV28:%.*]] = sext i8 [[TMP106]] to i32 +// CHECK1-NEXT: [[TMP107:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], align 1 // CHECK1-NEXT: [[CONV29:%.*]] = sext i8 [[TMP107]] to i32 -// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], align 1 -// CHECK1-NEXT: [[CONV30:%.*]] = sext i8 [[TMP108]] to i32 -// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i32 [[CONV29]], [[CONV30]] -// CHECK1-NEXT: [[CONV32:%.*]] = trunc i32 [[ADD31]] to i8 -// CHECK1-NEXT: store i8 [[CONV32]], ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP109:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 -// CHECK1-NEXT: [[TMP110:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i8 [[TMP106]], i8 [[TMP109]] monotonic monotonic, align 1 -// CHECK1-NEXT: [[TMP111]] = extractvalue { i8, i1 } [[TMP110]], 0 -// CHECK1-NEXT: [[TMP112:%.*]] = extractvalue { i8, i1 } [[TMP110]], 1 -// CHECK1-NEXT: br i1 [[TMP112]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[CONV28]], [[CONV29]] +// CHECK1-NEXT: [[CONV31:%.*]] = trunc i32 [[ADD30]] to i8 +// CHECK1-NEXT: store i8 [[CONV31]], ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP108:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 +// CHECK1-NEXT: [[TMP109:%.*]] = cmpxchg ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i8 [[TMP105]], i8 [[TMP108]] monotonic monotonic, align 1 +// CHECK1-NEXT: [[TMP110]] = extractvalue { i8, i1 } [[TMP109]], 0 +// CHECK1-NEXT: [[TMP111:%.*]] = extractvalue { i8, i1 } [[TMP109]], 1 +// CHECK1-NEXT: br i1 [[TMP111]], label [[ATOMIC_EXIT]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST26]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT34]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST25]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE35:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT33]], [[TMP104]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_DONE36]], label [[OMP_ARRAYCPY_BODY24]] -// CHECK1: omp.arraycpy.done36: +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT32]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST25]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT33]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST24]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE34:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT32]], [[TMP103]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE34]], label [[OMP_ARRAYCPY_DONE35]], label [[OMP_ARRAYCPY_BODY23]] +// CHECK1: omp.arraycpy.done35: // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: -// CHECK1-NEXT: [[TMP113:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP113]]) +// CHECK1-NEXT: [[TMP112:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(ptr [[TMP112]]) // CHECK1-NEXT: ret void // // @@ -683,8 +703,8 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -695,12 +715,12 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP2]], align 4 // CHECK1-NEXT: ret void // // @@ -795,59 +815,59 @@ // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) // CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr @.omp_task_privates_map., ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: call void [[TMP13]](ptr [[TMP14]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 -// CHECK1-NEXT: [[TMP22:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP19]], ptr [[TMP18]]) -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = sext i32 [[TMP28]] to i64 -// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP29]] -// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[TMP30]], align 8 -// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP31]], i64 9 -// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 -// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP32]], i64 [[LB_ADD_LEN_I]] -// CHECK1-NEXT: [[TMP33:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 -// CHECK1-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP35:%.*]] = sub i64 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 -// CHECK1-NEXT: [[TMP39:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], ptr [[TMP39]], ptr [[TMP25]]) -// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP12]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[TMP41]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP45:%.*]] = ptrtoint ptr [[TMP25]] to i64 -// CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, ptr [[TMP40]], i64 [[TMP47]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: [[TMP17:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP15]], ptr [[TMP14]]) +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[LB_ADD_LEN_I:%.*]] = add nsw i64 -1, [[TMP24]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds ptr, ptr [[TMP26]], i64 9 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[ARRAYIDX2_I]], align 8 +// CHECK1-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds i8, ptr [[TMP27]], i64 [[LB_ADD_LEN_I]] +// CHECK1-NEXT: [[TMP28:%.*]] = ptrtoint ptr [[ARRAYIDX3_I]] to i64 +// CHECK1-NEXT: [[TMP29:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP30:%.*]] = sub i64 [[TMP28]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = sdiv exact i64 [[TMP30]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP32:%.*]] = add nuw i64 [[TMP31]], 1 +// CHECK1-NEXT: [[TMP33:%.*]] = mul nuw i64 [[TMP32]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: store i64 [[TMP32]], ptr @{{reduction_size[.].+[.]}}, align 8, !noalias !12 +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = call ptr @__kmpc_task_reduction_get_th_data(i32 [[TMP16]], ptr [[TMP34]], ptr [[TMP20]]) +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP9]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP37:%.*]] = load ptr, ptr [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load ptr, ptr [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +// CHECK1-NEXT: [[TMP40:%.*]] = ptrtoint ptr [[TMP20]] to i64 +// CHECK1-NEXT: [[TMP41:%.*]] = sub i64 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = sdiv exact i64 [[TMP41]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr i8, ptr [[TMP35]], i64 [[TMP42]] // CHECK1-NEXT: store ptr [[TMP4_I]], ptr [[TMP_I]], align 8, !noalias !12 -// CHECK1-NEXT: store ptr [[TMP48]], ptr [[TMP4_I]], align 8, !noalias !12 +// CHECK1-NEXT: store ptr [[TMP43]], ptr [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -859,38 +879,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void @@ -904,38 +924,38 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP3]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[TMP2]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = ptrtoint ptr [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP18]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP15]], [[TMP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP14]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[TMP11]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP22]] to i32 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP23]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP18:%.*]] = load i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP18]] to i32 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP19]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // CHECK1-NEXT: store i8 [[CONV4]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP21]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done5: // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -427,83 +427,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -513,64 +527,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -578,83 +596,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -664,64 +696,68 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -729,83 +765,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -815,85 +865,89 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK1-NEXT: ret void // // @@ -901,83 +955,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -987,55 +1055,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1050,83 +1122,97 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1136,55 +1222,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1421,81 +1511,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1505,61 +1609,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1567,81 +1675,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1651,61 +1773,65 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1713,81 +1839,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1797,80 +1937,84 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK3-NEXT: ret void // // @@ -1878,81 +2022,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1962,52 +2120,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2022,81 +2184,95 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2106,52 +2282,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: @@ -2388,83 +2568,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2474,64 +2668,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2539,83 +2737,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2625,64 +2837,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2690,83 +2906,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2776,85 +3006,89 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK5-NEXT: ret void // // @@ -2862,83 +3096,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2948,55 +3196,59 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3011,83 +3263,97 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3097,55 +3363,59 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3382,81 +3652,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3466,61 +3750,65 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3528,81 +3816,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3612,61 +3914,65 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3674,81 +3980,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3758,80 +4078,84 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4 -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK7-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK7: omp.dispatch.end: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK7-NEXT: ret void // // @@ -3839,81 +4163,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3923,52 +4261,56 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -3983,81 +4325,95 @@ // CHECK7-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK7-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK7-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK7: cond.true: // CHECK7-NEXT: br label [[COND_END:%.*]] // CHECK7: cond.false: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK7-NEXT: br label [[COND_END]] // CHECK7: cond.end: -// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK7-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK7-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK7-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK7-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: -// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK7-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void // // // CHECK7-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK7-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK7-NEXT: entry: // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK7-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4067,52 +4423,56 @@ // CHECK7-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK7-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK7-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK7-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK7: omp.dispatch.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK7-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK7-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK7: omp.dispatch.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK7-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: @@ -4546,23 +4906,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4572,90 +4937,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4668,84 +5046,88 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4757,23 +5139,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4783,90 +5170,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -4879,84 +5279,88 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -4970,7 +5374,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -4979,22 +5383,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5004,121 +5412,135 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK13-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5131,85 +5553,91 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void @@ -5221,23 +5649,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5247,90 +5680,103 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5343,73 +5789,77 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5430,7 +5880,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5439,22 +5889,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5464,96 +5918,110 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]) +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5566,75 +6034,81 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -5872,83 +6346,97 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5958,63 +6446,67 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6022,83 +6514,97 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6108,63 +6614,67 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK13-NEXT: ret void // // @@ -6174,97 +6684,111 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6274,86 +6798,92 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -6361,83 +6891,97 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6447,54 +6991,58 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6511,97 +7059,111 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6611,56 +7173,62 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7098,23 +7666,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7124,88 +7697,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7218,81 +7804,85 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7304,23 +7894,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7330,88 +7925,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7424,81 +8032,85 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7512,7 +8124,7 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -7521,22 +8133,26 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7546,119 +8162,133 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]) +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK15-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK15-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK15-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK15-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7671,82 +8301,88 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK15-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void @@ -7758,23 +8394,28 @@ // CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7784,88 +8425,101 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7878,70 +8532,74 @@ // CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK15-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -7962,7 +8620,7 @@ // CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -7971,22 +8629,26 @@ // CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -7996,94 +8658,108 @@ // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]) +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK15-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK15-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK15-NEXT: br label [[OMP_PRECOND_END]] // CHECK15: omp.precond.end: // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8096,72 +8772,78 @@ // CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK15-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK15-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK15: omp.precond.then: // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK15-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK15-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -8399,81 +9081,95 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8483,60 +9179,64 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8544,81 +9244,95 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8628,60 +9342,64 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK15-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK15-NEXT: ret void // // @@ -8691,95 +9409,109 @@ // CHECK15-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8789,81 +9521,87 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK15-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK15-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK15: omp.dispatch.end: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -8871,81 +9609,95 @@ // CHECK15-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -8955,51 +9707,55 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK15-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9016,95 +9772,109 @@ // CHECK15-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK15-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK15: cond.true: // CHECK15-NEXT: br label [[COND_END:%.*]] // CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: br label [[COND_END]] // CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK15-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK15-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK15-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK15-NEXT: ret void // // // CHECK15-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK15-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK15-NEXT: entry: // CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9114,53 +9884,59 @@ // CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK15-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK15-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK15-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK15-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK15-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK15-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK15-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK15: omp.dispatch.body: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK15-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: @@ -9594,23 +10370,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9620,90 +10401,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9716,84 +10510,88 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -9805,23 +10603,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9831,90 +10634,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -9927,84 +10743,88 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10018,7 +10838,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10027,22 +10847,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10052,121 +10876,135 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]) +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK17-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK17-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10179,85 +11017,91 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void @@ -10269,23 +11113,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10295,90 +11144,103 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK17-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10391,73 +11253,77 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -10478,7 +11344,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -10487,22 +11353,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10512,96 +11382,110 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK17-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK17-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8 +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8 +// CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) // CHECK17-NEXT: br label [[OMP_PRECOND_END]] // CHECK17: omp.precond.end: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -10614,75 +11498,81 @@ // CHECK17-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -10920,83 +11810,97 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11006,63 +11910,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11070,83 +11978,97 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11156,63 +12078,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK17-NEXT: ret void // // @@ -11222,97 +12148,111 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11322,86 +12262,92 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK17-NEXT: ret void // // @@ -11409,83 +12355,97 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]) +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11495,54 +12455,58 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11559,97 +12523,111 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK17-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -11659,56 +12637,62 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12146,23 +13130,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12172,88 +13161,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12266,81 +13268,85 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12352,23 +13358,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12378,88 +13389,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12472,81 +13496,85 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12560,7 +13588,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -12569,22 +13597,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12594,119 +13626,133 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK19-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -12719,82 +13765,88 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void @@ -12806,23 +13858,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12832,88 +13889,101 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -12926,70 +13996,74 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13010,7 +14084,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -13019,22 +14093,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13044,94 +14122,108 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) // CHECK19-NEXT: br label [[OMP_PRECOND_END]] // CHECK19: omp.precond.end: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13144,72 +14236,78 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -13447,81 +14545,95 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13531,60 +14643,64 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -13592,81 +14708,95 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13676,60 +14806,64 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) // CHECK19-NEXT: ret void // // @@ -13739,95 +14873,109 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -13837,81 +14985,87 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) // CHECK19-NEXT: ret void // // @@ -13919,81 +15073,95 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14003,51 +15171,55 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14064,95 +15236,109 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14162,53 +15348,59 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -355,6 +355,7 @@ // CHECK1-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -364,17 +365,20 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -384,79 +388,94 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i64 [[TMP16]], i64 [[TMP18]], ptr [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -469,14 +488,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -489,88 +505,92 @@ // CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP31]], 0 // CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -588,112 +608,134 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[I4]], ptr [[TMP1]], ptr [[TMP2]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK1-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[I3]], ptr [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK1-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -702,129 +744,129 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP17]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP11]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP19]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP30]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK1-NEXT: store i32 [[ADD13]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK1-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK1-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK1-NEXT: store i32 [[ADD12]], ptr [[TMP6]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK1-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: @@ -1014,6 +1056,7 @@ // CHECK3-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -1023,17 +1066,20 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1043,77 +1089,92 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..1, i32 [[TMP15]], i32 [[TMP16]], ptr [[TMP0]], ptr [[TMP1]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP30]], 0 // CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -1126,14 +1187,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1146,85 +1204,89 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP16]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP8]], i32 0, i32 [[TMP25]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP31]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1242,110 +1304,132 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[I4]], ptr [[TMP1]], ptr [[TMP2]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[I3]], ptr [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 +// CHECK3-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -1354,126 +1438,126 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTLINEAR_START]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP11]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP21]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP24]], [[COND_TRUE]] ], [ [[TMP25]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP27]], [[TMP28]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP29]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP10]], i32 0, i32 [[TMP30]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK3-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK3-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK3-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[TMP6]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK3: .omp.linear.pu: // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: @@ -1867,116 +1951,141 @@ // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] +// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i64 16) ] // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[I4]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK9-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[I3]], ptr [[TMP28]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP30]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1985,131 +2094,130 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK9-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: -// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i64 16) ] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP19]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK9-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK9-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK9-NEXT: store i32 [[ADD13]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK9-NEXT: store i32 [[ADD12]], ptr [[TMP6]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: @@ -2235,114 +2343,139 @@ // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] +// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i32 16) ] // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[I4]], ptr [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[I3]], ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -2351,128 +2484,127 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK11-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: -// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK11-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i32 16) ] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 [[TMP23]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 [[TMP32]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK11-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK11-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK11-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK11-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK11-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK11-NEXT: store i32 [[ADD11]], ptr [[TMP6]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK11: .omp.linear.pu: // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: @@ -2727,98 +2859,114 @@ // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 122 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i64 [[TMP10]], i64 [[TMP12]], ptr [[I1]], ptr [[TMP1]]) +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17: omp.inner.for.body: +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK17-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK17-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[I]], ptr [[TMP18]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store ptr [[TMP4]], ptr [[TMP19]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK17-NEXT: store i32 123, ptr [[TMP2]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -2826,86 +2974,90 @@ // CHECK17-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I2:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP8]], i32 0, i32 0 // CHECK17-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 0 // CHECK17-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTLINEAR_START]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[CONV:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP11]] to i32 // CHECK17-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]]) +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], ptr [[I2]], align 4 -// CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I2]], align 4 -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A5]], i64 0, i64 [[IDXPROM]] +// CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK17-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP8]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A4]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK17-NEXT: store i32 123, ptr [[TMP6]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK17: .omp.final.done: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: @@ -2994,96 +3146,112 @@ // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 2, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[I_ADDR]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I1:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP4]], i32 0, i32 0 // CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 122 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined..1, i32 [[TMP9]], i32 [[TMP10]], ptr [[I1]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[I]], ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store ptr [[TMP4]], ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK19-NEXT: store i32 123, ptr [[TMP2]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 @@ -3091,83 +3259,87 @@ // CHECK19-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I1:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[I2:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP1]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP8]], i32 0, i32 0 // CHECK19-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 0 // CHECK19-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTLINEAR_START]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]]) -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP6]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP13]]) +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], ptr [[I1]], align 4 -// CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP1]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I1]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A4]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4 +// CHECK19-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP8]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A3]], i32 0, i32 [[TMP20]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: store i32 123, ptr [[TMP0]], align 4 +// CHECK19-NEXT: store i32 123, ptr [[TMP6]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK19: .omp.final.done: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 +// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK19: .omp.linear.pu: // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: @@ -3424,116 +3596,141 @@ // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK25-NEXT: store i64 [[I]], ptr [[I_ADDR]], align 8 // CHECK25-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK25-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: -// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] +// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i64 16) ] // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK25-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK25-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK25-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK25-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i64 [[TMP18]], i64 [[TMP20]], ptr [[I4]], ptr [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK25-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK25-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8 +// CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8 +// CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[I3]], ptr [[TMP28]], align 8 +// CHECK25-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK25-NEXT: store ptr [[TMP4]], ptr [[TMP29]], align 8 +// CHECK25-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK25-NEXT: store i64 [[TMP6]], ptr [[TMP30]], align 8 +// CHECK25-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK25-NEXT: store ptr [[TMP8]], ptr [[TMP31]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) -// CHECK25-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK25-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK25-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK25-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: -// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK25-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK25-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK25-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK25-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK25: .omp.final.done: // CHECK25-NEXT: br label [[OMP_PRECOND_END]] @@ -3542,131 +3739,130 @@ // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: [[I6:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK25-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK25-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK25-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK25-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK25-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK25-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK25-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK25-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK25: omp.precond.then: -// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i64 16) ] -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK25-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i64 16) ] +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK25-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK25-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP18]] to i32 +// CHECK25-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP19]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK25-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK25-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK25-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK25-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK25-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK25-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK25-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK25-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK25-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK25-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK25-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK25-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], ptr [[I5]], align 4 -// CHECK25-NEXT: [[TMP23:%.*]] = load i32, ptr [[I5]], align 4 -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]] +// CHECK25-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 +// CHECK25-NEXT: [[TMP32:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK25-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK25-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK25-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK25-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK25-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK25-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: -// CHECK25-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK25-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK25-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK25-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK25-NEXT: store i32 [[ADD13]], ptr [[TMP0]], align 4 +// CHECK25-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK25-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK25-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 +// CHECK25-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 +// CHECK25-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] +// CHECK25-NEXT: store i32 [[ADD12]], ptr [[TMP6]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK25: .omp.final.done: -// CHECK25-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK25-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK25-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK25-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK25: .omp.linear.pu: // CHECK25-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK25: .omp.linear.pu.done: @@ -3753,6 +3949,7 @@ // CHECK25-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK25-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK25-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -3761,73 +3958,88 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK25-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK25-NEXT: ret void // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK25-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK25-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK25-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK25-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK25-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK25-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK25-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: // CHECK25-NEXT: store i32 10, ptr [[I]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3836,13 +4048,11 @@ // // // CHECK25-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK25-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK25-NEXT: entry: // CHECK25-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK25-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK25-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3852,66 +4062,70 @@ // CHECK25-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK25-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK25-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK25-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK25-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK25-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK25-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK25-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK25-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK25-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK25-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK25-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK25-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK25-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK25: cond.true: // CHECK25-NEXT: br label [[COND_END:%.*]] // CHECK25: cond.false: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK25-NEXT: br label [[COND_END]] // CHECK25: cond.end: -// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK25-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK25-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK25-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK25-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK25-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK25-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK25-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK25-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: -// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK25-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK25-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK25-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK25-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK25: .omp.final.then: // CHECK25-NEXT: store i32 10, ptr [[I]], align 4 // CHECK25-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4044,114 +4258,139 @@ // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4 // CHECK27-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 4, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[I_ADDR]], ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: -// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] +// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP8]], i32 16) ] // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP9]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK27-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] +// CHECK27-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK27-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 6, ptr @.omp_outlined..1, i32 [[TMP17]], i32 [[TMP18]], ptr [[I4]], ptr [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4 +// CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[I3]], ptr [[TMP26]], align 4 +// CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK27-NEXT: store ptr [[TMP4]], ptr [[TMP27]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK27-NEXT: store i32 [[TMP6]], ptr [[TMP28]], align 4 +// CHECK27-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK27-NEXT: store ptr [[TMP8]], ptr [[TMP29]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK27-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK27-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK27-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: -// CHECK27-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK27-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 -// CHECK27-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD9]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK27-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 +// CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] +// CHECK27-NEXT: store i32 [[ADD8]], ptr [[TMP2]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK27: .omp.final.done: // CHECK27-NEXT: br label [[OMP_PRECOND_END]] @@ -4160,128 +4399,127 @@ // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTLINEAR_START:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK27-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK27-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK27-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK27-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK27-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK27-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK27-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK27-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK27: omp.precond.then: -// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP3]], i32 16) ] -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTLINEAR_START]], align 4 +// CHECK27-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP12]], i32 16) ] +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK27-NEXT: store i32 [[TMP16]], ptr [[DOTLINEAR_START]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK27-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP12]]) -// CHECK27-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK27-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] -// CHECK27-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK27-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK27-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP21]]) +// CHECK27-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP23]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], [[TMP25]] +// CHECK27-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK27-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ [[TMP26]], [[COND_TRUE]] ], [ [[TMP27]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK27-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK27-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP29]], [[TMP30]] +// CHECK27-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 +// CHECK27-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP31]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD]], ptr [[I4]], align 4 -// CHECK27-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 [[TMP23]] +// CHECK27-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 +// CHECK27-NEXT: [[TMP32:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 [[TMP32]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK27-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], 1 +// CHECK27-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK27-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK27-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK27-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK27-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: -// CHECK27-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK27-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK27-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 -// CHECK27-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 -// CHECK27-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK27-NEXT: store i32 [[ADD12]], ptr [[TMP0]], align 4 +// CHECK27-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK27-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK27-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK27-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK27-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK27-NEXT: store i32 [[ADD11]], ptr [[TMP6]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK27: .omp.final.done: -// CHECK27-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK27-NEXT: br i1 [[TMP31]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] +// CHECK27-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK27-NEXT: br i1 [[TMP40]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK27: .omp.linear.pu: // CHECK27-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK27: .omp.linear.pu.done: @@ -4368,6 +4606,7 @@ // CHECK27-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4]]) // CHECK27-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -4376,71 +4615,86 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[TMP1]]) +// CHECK27-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK27-NEXT: ret void // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK27-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK27-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK27-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP12]] // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK27-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK27-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: // CHECK27-NEXT: store i32 10, ptr [[I]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4449,13 +4703,11 @@ // // // CHECK27-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK27-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK27-NEXT: entry: // CHECK27-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK27-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4465,63 +4717,67 @@ // CHECK27-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK27-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK27-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK27-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK27-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK27-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK27-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK27-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK27: cond.true: // CHECK27-NEXT: br label [[COND_END:%.*]] // CHECK27: cond.false: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK27-NEXT: br label [[COND_END]] // CHECK27: cond.end: -// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK27-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK27-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK27-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK27-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK27-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK27-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK27-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK27-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: -// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK27-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK27-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK27-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK27-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK27-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK27-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK27: .omp.final.then: // CHECK27-NEXT: store i32 10, ptr [[I]], align 4 // CHECK27-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -129,34 +129,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -164,26 +164,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -191,58 +194,71 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -252,13 +268,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -270,79 +284,83 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -380,34 +398,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -415,26 +433,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -442,56 +463,69 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !5 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -501,13 +535,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -519,75 +551,79 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -630,39 +666,39 @@ // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 @@ -701,37 +737,37 @@ // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 @@ -761,10 +797,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -789,91 +825,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86 @@ -884,6 +920,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -892,133 +929,160 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP21]], i64 [[TMP22]], ptr [[TMP0]], ptr [[TMP1]], i64 [[TMP2]], i64 [[TMP3]], ptr [[TMP4]]), !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP27]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP6]], ptr [[TMP33]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: store i64 [[TMP8]], ptr [[TMP34]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[TMP10]], ptr [[TMP35]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK9-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP39]]) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CHECK9-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK9-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK9-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK9-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK9-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK9-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK9-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK9-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK9-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1027,172 +1091,170 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP23]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP26]], [[CONV25]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP25]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[TMP30:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP30]] -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP33]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP36]], [[CONV23]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP35]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP39]] to i64 +// CHECK9-NEXT: [[TMP40:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP12]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP40]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP41]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP37]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -1213,34 +1275,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1252,18 +1314,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1271,58 +1336,71 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group !14 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1332,13 +1410,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1350,78 +1426,82 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1482,91 +1562,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l86 @@ -1577,6 +1657,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1585,135 +1666,162 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP22]], i32 [[TMP24]], ptr [[TMP0]], ptr [[TMP1]], i32 [[TMP2]], i32 [[TMP3]], ptr [[TMP4]]), !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 +// CHECK11-NEXT: store i32 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i32 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP32]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP33]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP34]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[TMP35]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[TMP36]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[TMP10]], ptr [[TMP37]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// CHECK11-NEXT: store i64 [[ADD]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP28]]) -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP41]]) +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP31]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 0, [[MUL17]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP32]], 0 -// CHECK11-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK11-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK11-NEXT: store i32 [[ADD22]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]] +// CHECK11-NEXT: store i32 [[ADD16]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP45]], 0 +// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 +// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 [[DIV18]], 1 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL19]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1722,172 +1830,170 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I13:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J14:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP18]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP19]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP20]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV11:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CONV12:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK11-NEXT: store i64 [[CONV11]], ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[CONV12]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP21]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[CONV9:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[CONV10:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK11-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP15]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP15:%.*]] = icmp sgt i64 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP15]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB2:[0-9]+]], i32 [[TMP25]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP26]], [[TMP27]] +// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP28]], [[COND_TRUE]] ], [ [[TMP29]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP20]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP30]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP24]], 0 -// CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 -// CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] -// CHECK11-NEXT: [[CONV20:%.*]] = sext i32 [[MUL19]] to i64 -// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i64 [[TMP23]], [[CONV20]] -// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] -// CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], ptr [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP27]], 0 -// CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 -// CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] -// CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP26]], [[CONV27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP28]], 0 -// CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] -// CHECK11-NEXT: [[CONV32:%.*]] = sext i32 [[MUL31]] to i64 -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[DIV28]], [[CONV32]] -// CHECK11-NEXT: [[SUB34:%.*]] = sub nsw i64 [[TMP25]], [[MUL33]] -// CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 -// CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] -// CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], ptr [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I13]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[TMP30:%.*]] = mul nsw i32 [[TMP29]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP30]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[J14]], align 4, !llvm.access.group !10 -// CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP31]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX38]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP33]], [[CONV18]] +// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP37]], 0 +// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP36]], [[CONV25]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP35]], [[MUL31]] +// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP40:%.*]] = mul nsw i32 [[TMP39]], [[TMP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP40]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP41]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP32]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP42]], 1 +// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 -// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP37]], 0 -// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 -// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 -// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] -// CHECK11-NEXT: store i32 [[ADD43]], ptr [[I13]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP38]], 0 -// CHECK11-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1 -// CHECK11-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1 -// CHECK11-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]] -// CHECK11-NEXT: store i32 [[ADD47]], ptr [[J14]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP47]], 0 +// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 +// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 +// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] +// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 +// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 +// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] +// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1908,34 +2014,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l72(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1947,18 +2053,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1966,56 +2075,69 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group !15 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -2025,13 +2147,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2043,74 +2163,78 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP16]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP18]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP17]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP6]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -2196,13 +2320,13 @@ // CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -2211,16 +2335,16 @@ // CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0 // CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -2230,22 +2354,22 @@ // CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64 // CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2290,38 +2414,38 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 @@ -2397,13 +2521,13 @@ // CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -2412,16 +2536,16 @@ // CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0 // CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0 // CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -2431,20 +2555,20 @@ // CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1 -// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -2489,36 +2613,36 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -287,75 +287,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -364,13 +380,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -380,67 +394,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -452,75 +470,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -529,13 +563,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -545,67 +577,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -617,95 +653,111 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i64 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] // CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -714,13 +766,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -730,67 +780,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -946,73 +1000,89 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1021,13 +1091,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1037,64 +1105,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1106,73 +1178,89 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1181,13 +1269,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1197,64 +1283,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1266,93 +1356,109 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP8]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP7]], i32 [[TMP8]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] // CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1361,13 +1467,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1377,64 +1481,68 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1958,23 +2066,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1984,81 +2097,98 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2071,15 +2201,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2092,90 +2218,94 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2193,23 +2323,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2219,81 +2354,98 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK9-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK9-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -2306,15 +2458,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2327,90 +2475,94 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK9-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -2430,7 +2582,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -2439,22 +2591,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2464,111 +2620,129 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK9-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK9-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK9: cond.true11: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13:%.*]] // CHECK9: cond.false12: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[COND_END13]] // CHECK9: cond.end13: -// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK9-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK9-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK9-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK9-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK9-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -2581,16 +2755,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2603,91 +2773,97 @@ // CHECK9-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK9-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK9-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -2838,75 +3014,91 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2915,13 +3107,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2931,66 +3121,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3002,75 +3196,91 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3079,13 +3289,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3095,66 +3303,70 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3168,109 +3380,125 @@ // CHECK9-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP27]], 9 // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK9: cond.true5: // CHECK9-NEXT: br label [[COND_END7:%.*]] // CHECK9: cond.false6: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[COND_END7]] // CHECK9: cond.end7: -// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ] +// CHECK9-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP28]], [[COND_FALSE6]] ] // CHECK9-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK9-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3279,14 +3507,12 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3296,67 +3522,73 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3635,23 +3867,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3661,79 +3898,96 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3746,15 +4000,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3767,87 +4017,91 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3865,23 +4119,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3891,79 +4150,96 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -3976,15 +4252,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3997,87 +4269,91 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -4097,7 +4373,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4106,22 +4382,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4131,109 +4411,127 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK11-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK11-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -4246,16 +4544,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4268,88 +4562,94 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK11-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -4500,73 +4800,89 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4575,13 +4891,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4591,63 +4905,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4659,73 +4977,89 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK11-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4734,13 +5068,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4750,63 +5082,67 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4820,107 +5156,123 @@ // CHECK11-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..18, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP3]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP11]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP25]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP26]], [[COND_FALSE6]] ] // CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4929,14 +5281,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4946,64 +5296,70 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP12]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP19]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -379,8 +379,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -389,140 +388,163 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: store i32 [[TMP17]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i64 [[TMP12]], i64 [[TMP14]], ptr [[VEC1]], i64 [[TMP16]], ptr [[S_ARR2]], ptr [[VAR4]], i64 [[TMP18]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK1-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK1-NEXT: store ptr [[VAR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 +// CHECK1-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done8: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done5: // CHECK1-NEXT: ret void // // @@ -560,144 +582,148 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 8 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[VAR5]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -705,10 +731,10 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -719,9 +745,9 @@ // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -785,12 +811,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -873,7 +899,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -882,23 +908,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -906,114 +936,132 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: store i32 [[TMP16]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC2]], i64 [[TMP17]], ptr [[S_ARR3]], ptr [[TMP18]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +// CHECK1-NEXT: store i64 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK1-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[VEC]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done10: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done7: // CHECK1-NEXT: ret void // // @@ -1031,16 +1079,12 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1048,127 +1092,133 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.2], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP7:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_2:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP4:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP3]] to i32 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC3]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_2]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done5: -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP7]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP7]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR6]], ptr [[_TMP8]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_2]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP4]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP5]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP8]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM10]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i64 0, i64 [[IDXPROM7]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.2], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done14: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done11: // CHECK1-NEXT: ret void // // @@ -1188,7 +1238,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -1202,7 +1252,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1218,9 +1268,9 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_2:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_2]], ptr [[TMP0]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1454,8 +1504,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1464,138 +1513,161 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..3, i32 [[TMP11]], i32 [[TMP12]], ptr [[VEC1]], i32 [[TMP14]], ptr [[S_ARR2]], ptr [[VAR4]], i32 [[TMP16]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[VAR]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done8: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done5: // CHECK3-NEXT: ret void // // @@ -1633,140 +1705,144 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP17]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP12]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK3-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP36]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1774,10 +1850,10 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -1788,9 +1864,9 @@ // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -1854,12 +1930,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1942,7 +2018,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1951,23 +2027,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1975,112 +2055,130 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..5, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC2]], i32 [[TMP15]], ptr [[S_ARR3]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[VEC]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP23]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done10: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done7: // CHECK3-NEXT: ret void // // @@ -2098,16 +2196,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2115,123 +2209,129 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP6]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP10]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] -// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP6]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP15]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP16]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP19]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP24]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP26]] +// CHECK3-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP28]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK3-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP34]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2251,7 +2351,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -2265,7 +2365,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2281,9 +2381,9 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -3093,35 +3193,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3129,72 +3227,85 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK9-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr @g, align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3203,15 +3314,14 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -3220,79 +3330,89 @@ // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP11]] to i32 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP15]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -173,70 +173,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -245,12 +258,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -260,60 +272,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -324,75 +340,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -401,12 +430,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -416,43 +444,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -460,17 +492,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -595,70 +627,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -667,12 +712,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -682,43 +726,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -726,17 +774,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -747,75 +795,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -824,12 +885,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -839,43 +899,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -883,17 +947,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -906,97 +970,113 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1005,12 +1085,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1020,43 +1099,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -1064,17 +1147,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1197,70 +1280,83 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1269,12 +1365,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1284,43 +1379,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -1328,17 +1427,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1349,75 +1448,88 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1426,12 +1538,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1441,43 +1552,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -1485,17 +1600,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1508,97 +1623,113 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK1-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP2]]) +// CHECK1-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK1-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1607,12 +1738,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1622,43 +1752,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -1666,17 +1800,17 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1754,70 +1888,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK3-SAME: () #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1826,12 +1973,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1841,60 +1987,64 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1905,75 +2055,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1982,12 +2145,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1997,43 +2159,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -2041,17 +2207,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2176,70 +2342,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2248,12 +2427,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2263,43 +2441,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -2307,17 +2489,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2328,75 +2510,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2405,12 +2600,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2420,43 +2614,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn5v() @@ -2464,17 +2662,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2487,151 +2685,172 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK3: omp_if.then4: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3: omp_if.then6: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @.omp_outlined..9(ptr [[TMP20]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK3: omp_if.else5: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK3: omp.inner.for.cond6: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK3: omp.inner.for.body8: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK3-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK3: omp_if.then13: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK3-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK3: omp_if.else14: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: br label [[OMP_IF_END16]] -// CHECK3: omp_if.end16: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK3: omp.inner.for.inc17: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK3-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP38:![0-9]+]] -// CHECK3: omp.inner.for.end19: -// CHECK3-NEXT: br label [[OMP_IF_END20]] -// CHECK3: omp_if.end20: +// CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK3: omp_if.else7: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK3: omp.inner.for.cond8: +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK3: omp.inner.for.body10: +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK3-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK3-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP31]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK3: omp_if.then15: +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK3-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK3: omp_if.else16: +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..10(ptr [[TMP34]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK3-NEXT: br label [[OMP_IF_END18]] +// CHECK3: omp_if.end18: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK3: omp.inner.for.inc19: +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK3: omp.inner.for.end21: +// CHECK3-NEXT: br label [[OMP_IF_END22]] +// CHECK3: omp_if.end22: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK3-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2640,13 +2859,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2656,48 +2874,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -2705,60 +2931,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2767,13 +2993,12 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2783,48 +3008,56 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -2832,60 +3065,60 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK3: omp.inner.for.cond9: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK3: omp.inner.for.body11: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK3: omp.inner.for.cond10: +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK3: omp.inner.for.body12: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn6v() -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK3: omp.body.continue14: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK3: omp.inner.for.inc15: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK3: omp.inner.for.end17: +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK3: omp.body.continue15: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK3: omp.inner.for.inc16: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3008,70 +3241,83 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3080,12 +3326,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3095,43 +3340,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -3139,17 +3388,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3160,75 +3409,88 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK3-SAME: () #[[ATTR1]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3237,12 +3499,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3252,43 +3513,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK3-NEXT: call void @_Z3fn2v() @@ -3296,17 +3561,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3319,97 +3584,113 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK3-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, i64 [[TMP2]]) +// CHECK3-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK3-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK3-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK3-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3418,12 +3699,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3433,43 +3713,47 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK3-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK3-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -3477,17 +3761,17 @@ // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 100, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4156,70 +4440,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK9-SAME: () #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4228,12 +4525,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4243,60 +4539,64 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4307,75 +4607,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4384,12 +4697,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4399,43 +4711,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -4443,17 +4759,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4578,70 +4894,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4650,12 +4979,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4665,43 +4993,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -4709,17 +5041,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4730,75 +5062,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4807,12 +5152,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4822,43 +5166,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group [[ACC_GRP35]] @@ -4866,17 +5214,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4889,97 +5237,113 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK9-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @.omp_outlined..9(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4988,12 +5352,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5003,43 +5366,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP41]] @@ -5047,17 +5414,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5180,70 +5547,83 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..10) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5252,12 +5632,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5267,43 +5646,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP47]] @@ -5311,17 +5694,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5332,75 +5715,88 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK9-SAME: () #[[ATTR1]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..12) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @.omp_outlined..13(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5409,12 +5805,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5424,43 +5819,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group [[ACC_GRP53]] @@ -5468,17 +5867,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5491,97 +5890,113 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK9-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK9-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, i64 [[TMP2]]) +// CHECK9-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK9-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK9-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..15, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @.omp_outlined..15(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5590,12 +6005,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5605,43 +6019,47 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP59]] @@ -5649,17 +6067,17 @@ // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 100, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5737,70 +6155,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l43 // CHECK11-SAME: () #[[ATTR1:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5809,12 +6240,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5824,60 +6254,64 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5888,75 +6322,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9gtid_testv_l47 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @.omp_outlined..3(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5965,12 +6412,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -5980,43 +6426,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group [[ACC_GRP23]] @@ -6024,17 +6474,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6159,70 +6609,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l76 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6231,12 +6694,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6246,43 +6708,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group [[ACC_GRP29]] @@ -6290,17 +6756,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6311,75 +6777,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l84 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..7(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6388,12 +6867,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6403,43 +6881,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn5v() @@ -6447,17 +6929,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6470,151 +6952,172 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, i64 [[TMP2]]) +// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR15:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_11:%.*]] = alloca [[STRUCT_ANON_8]], align 8 +// CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE5:%.*]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE7:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] // CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 -// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL3]], label [[OMP_IF_THEN4:%.*]], label [[OMP_IF_ELSE:%.*]] -// CHECK11: omp_if.then4: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..9, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 +// CHECK11-NEXT: store i64 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK11-NEXT: store i64 [[TMP14]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP18:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP18]] to i1 +// CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP19]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11: omp_if.then6: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP15]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @.omp_outlined..9(ptr [[TMP20]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_IF_END20:%.*]] -// CHECK11: omp_if.else5: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6:%.*]] -// CHECK11: omp.inner.for.cond6: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY8:%.*]], label [[OMP_INNER_FOR_END19:%.*]] -// CHECK11: omp.inner.for.body8: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL9:%.*]] = trunc i8 [[TMP24]] to i1 -// CHECK11-NEXT: [[FROMBOOL11:%.*]] = zext i1 [[TOBOOL9]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL11]], ptr [[DOTCAPTURE_EXPR__CASTED10]], align 1 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED10]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP26]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL12]], label [[OMP_IF_THEN13:%.*]], label [[OMP_IF_ELSE14:%.*]] -// CHECK11: omp_if.then13: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) -// CHECK11-NEXT: br label [[OMP_IF_END16:%.*]] -// CHECK11: omp_if.else14: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR15]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP27]], ptr [[DOTBOUND_ZERO_ADDR15]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: br label [[OMP_IF_END16]] -// CHECK11: omp_if.end16: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] -// CHECK11: omp.inner.for.inc17: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND6]], !llvm.loop [[LOOP38:![0-9]+]] -// CHECK11: omp.inner.for.end19: -// CHECK11-NEXT: br label [[OMP_IF_END20]] -// CHECK11: omp_if.end20: +// CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] +// CHECK11: omp_if.else7: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] +// CHECK11: omp.inner.for.cond8: +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END21:%.*]] +// CHECK11: omp.inner.for.body10: +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK11-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +// CHECK11-NEXT: store i64 [[TMP28]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP29]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP30]], align 8 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_11]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP32:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL12:%.*]] = trunc i8 [[TMP32]] to i1 +// CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL12]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL13]], ptr [[TMP31]], align 8 +// CHECK11-NEXT: [[TMP33:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP33]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] +// CHECK11: omp_if.then15: +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_11]]) +// CHECK11-NEXT: br label [[OMP_IF_END18:%.*]] +// CHECK11: omp_if.else16: +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR17]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..10(ptr [[TMP34]], ptr [[DOTBOUND_ZERO_ADDR17]], ptr [[OMP_OUTLINED_ARG_AGG_11]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]) +// CHECK11-NEXT: br label [[OMP_IF_END18]] +// CHECK11: omp_if.end18: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]] +// CHECK11: omp.inner.for.inc19: +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK11-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK11: omp.inner.for.end21: +// CHECK11-NEXT: br label [[OMP_IF_END22]] +// CHECK11: omp_if.end22: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 -// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK11-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6623,13 +7126,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6639,48 +7141,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP39]] @@ -6688,60 +7198,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP42:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6750,13 +7260,12 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6766,48 +7275,56 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP11]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group [[ACC_GRP43]] @@ -6815,60 +7332,60 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP14]], 99 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 99, [[COND_TRUE5]] ], [ [[TMP15]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] -// CHECK11: omp.inner.for.cond9: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK11: omp.inner.for.body11: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP22]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10:%.*]] +// CHECK11: omp.inner.for.cond10: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY12:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK11: omp.inner.for.body12: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[TMP26]], 1 +// CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] +// CHECK11-NEXT: store i32 [[ADD14]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn6v() -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK11: omp.body.continue14: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK11: omp.inner.for.inc15: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP46:![0-9]+]] -// CHECK11: omp.inner.for.end17: +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK11: omp.body.continue15: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK11: omp.inner.for.inc16: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK11-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6991,70 +7508,83 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l57 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..11) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..12, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..12, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7063,12 +7593,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..12 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7078,43 +7607,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group [[ACC_GRP50]] @@ -7122,17 +7655,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7143,75 +7676,88 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiEiT__l62 // CHECK11-SAME: () #[[ATTR1]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..13) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK11-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP11]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @.omp_outlined..14(ptr [[TMP14]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7220,12 +7766,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7235,43 +7780,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK11-NEXT: call void @_Z3fn2v() @@ -7279,17 +7828,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7302,97 +7851,113 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[ARG_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 1 // CHECK11-NEXT: store i64 [[ARG]], ptr [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARG_ADDR]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP1]] to i1 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK11-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, i64 [[TMP2]]) +// CHECK11-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP1]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 -// CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK11-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP16]] to i1 +// CHECK11-NEXT: br i1 [[TOBOOL2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..16, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..16, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP12]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @.omp_outlined..16(ptr [[TMP17]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]], !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB3]], i32 [[TMP4]]), !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK11-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7401,12 +7966,11 @@ // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..16 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7416,43 +7980,47 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK11-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK11-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK11-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK11-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK11-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK11-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group [[ACC_GRP58]] @@ -7460,17 +8028,17 @@ // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 100, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -210,25 +210,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -236,105 +242,119 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i64 [[TMP13]], i64 [[TMP15]], ptr [[G2]], ptr [[TMP16]], ptr [[SVAR5]], ptr [[SFVAR6]]), !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK1-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK1-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK1-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK1-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP23]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP24]], align 8 -// CHECK1-NEXT: store volatile double [[TMP25]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP27]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP34]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load double, ptr [[TMP35]], align 8 +// CHECK1-NEXT: store volatile double [[TMP36]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP37]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP38]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -342,112 +362,116 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G3:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G14:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP5:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR7:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G14]], ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR6]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR7]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G3]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP5]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR6]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR7]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK1-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK1-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[G3]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 8 -// CHECK1-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR6]], align 4 -// CHECK1-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR7]], align 4 -// CHECK1-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 8 +// CHECK1-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -489,6 +513,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -502,20 +527,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -523,103 +553,117 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..1, i32 [[TMP12]], i32 [[TMP13]], ptr [[G2]], ptr [[TMP14]], ptr [[SVAR5]], ptr [[SFVAR6]]), !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK3-NEXT: br i1 [[TMP31]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP21]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 4 -// CHECK3-NEXT: store volatile double [[TMP23]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP25]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP32]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load double, ptr [[TMP33]], align 4 +// CHECK3-NEXT: store volatile double [[TMP34]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP35]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP36]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -627,110 +671,114 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK3-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK3-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load double, ptr [[TMP27]], align 4 -// CHECK3-NEXT: store volatile double [[TMP28]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP30]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP35]], ptr [[TMP6]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store volatile double [[TMP37]], ptr [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP38]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP39]], ptr [[TMP12]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -926,6 +974,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -935,21 +984,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -957,31 +1012,36 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -991,112 +1051,122 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i64 [[TMP14]], i64 [[TMP16]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP17]], ptr [[SVAR7]]), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK9-NEXT: store i64 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK9-NEXT: store ptr [[SVAR]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK9-NEXT: br i1 [[TMP38]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP39]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP40]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done10: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP28]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP40]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP41:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP41]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP42]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP43]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done12: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1104,39 +1174,43 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR8:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1146,106 +1220,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM10]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX11]], ptr align 4 [[TMP18]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP28]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK9-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN13]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE14:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN13]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE14]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done14: -// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN15:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN15]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN15]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE16:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done16: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1263,10 +1337,10 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK9-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 @@ -1277,9 +1351,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 8 @@ -1343,12 +1417,12 @@ // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1423,6 +1497,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1431,20 +1506,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1452,140 +1532,154 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i64 [[TMP13]], i64 [[TMP15]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP16]]), !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK9-NEXT: store i64 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP31]]) +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK9-NEXT: br i1 [[TMP33]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP36]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done9: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP37]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done5: +// CHECK9-NEXT: [[TMP38:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP38]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP39]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done11: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done7: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1593,143 +1687,147 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC4:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR5:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR6:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP13]] to i32 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP14]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) -// CHECK9-NEXT: store ptr [[VAR6]], ptr [[_TMP7]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR3]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP25]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP26]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK9-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK9-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK9-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC4]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR5]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP7]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done9: +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP3]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR5]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN10]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done11: // CHECK9-NEXT: ret void // // @@ -1749,7 +1847,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1762,7 +1860,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1931,6 +2029,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1940,21 +2039,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1962,31 +2067,36 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1996,110 +2106,120 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 7, ptr @.omp_outlined..1, i32 [[TMP13]], i32 [[TMP14]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP15]], ptr [[SVAR7]]), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK11-NEXT: store ptr [[SVAR]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN9]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE10:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN9]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE10]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done10: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP26]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP40]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done12: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2107,37 +2227,41 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -2147,104 +2271,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP19]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP20]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP21]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP18]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: store i32 [[TMP26]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP29]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP28]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK11-NEXT: br i1 [[TMP36]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP27]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP37]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP28]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP7]], ptr align 4 [[TMP29]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP30]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP38]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP39:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP17]], ptr align 4 [[TMP39]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP40]], ptr [[TMP14]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2262,10 +2386,10 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK11-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 @@ -2276,9 +2400,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VAR]], align 4 @@ -2342,12 +2466,12 @@ // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP31]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -2422,6 +2546,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -2430,20 +2555,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..2, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2451,138 +2581,152 @@ // CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..3, i32 [[TMP12]], i32 [[TMP13]], ptr [[VEC3]], ptr [[T_VAR2]], ptr [[S_ARR4]], ptr [[TMP14]]), !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP21]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP29]]) +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK11-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0 +// CHECK11-NEXT: br i1 [[TMP33]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN8]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE9:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN4]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE5:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN8]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP24]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE9]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done9: -// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP25]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP35]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE5]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done5: +// CHECK11-NEXT: [[TMP36:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP36]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP26]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP37]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done11: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done7: // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2590,139 +2734,143 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP17]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP18]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP18]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP25]] +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP27]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP26]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK11-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP35]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[TMP10]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP27]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP28:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP6]], ptr align 4 [[TMP28]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP36]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP15]], ptr align 4 [[TMP37]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP38]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -2742,7 +2890,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -2755,7 +2903,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -222,71 +222,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK1-SAME: () #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -295,12 +308,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -310,43 +322,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -356,27 +372,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK1-NEXT: unreachable // // @@ -392,82 +408,96 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -476,12 +506,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -491,43 +520,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -537,27 +570,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: unreachable // // @@ -733,71 +766,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -806,12 +852,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -821,43 +866,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -867,98 +916,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -967,12 +1029,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -982,43 +1043,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1028,98 +1093,111 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: unreachable // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1128,12 +1206,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1143,43 +1220,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1189,27 +1270,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: unreachable // // @@ -1218,17 +1299,17 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK1-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK1-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK1-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK1-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -1239,70 +1320,84 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK1-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1311,12 +1406,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1326,43 +1420,47 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: invoke void @_Z3foov() @@ -1372,27 +1470,27 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 100, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: -// CHECK1-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK1-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK1-NEXT: catch ptr null -// CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: unreachable // // @@ -1899,71 +1997,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l50 // CHECK5-SAME: () #[[ATTR3:[0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1972,12 +2083,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1987,43 +2097,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2033,27 +2147,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9:[0-9]+]], !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: unreachable // // @@ -2069,82 +2183,96 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, i64 [[TMP2]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[TMP1]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2153,12 +2281,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2168,43 +2295,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2214,27 +2345,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: unreachable // // @@ -2401,71 +2532,84 @@ // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 5), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2474,12 +2618,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2489,43 +2632,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2535,98 +2682,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIcLi5EEiv_l40 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..6) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..7, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 23), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2635,12 +2795,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2650,43 +2809,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2696,98 +2859,111 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: unreachable // // // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainI1SLi1EEiv_l36 // CHECK5-SAME: () #[[ATTR3]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..8) +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..8, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..9, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP2]], i32 1), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK5-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..9, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2796,12 +2972,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..9 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2811,43 +2986,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -2857,27 +3036,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: unreachable // // @@ -2886,17 +3065,17 @@ // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 1 // CHECK5-NEXT: invoke void @_ZN1SC1El(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]], i64 noundef 23) // CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK5: invoke.cont: // CHECK5-NEXT: [[CALL:%.*]] = call noundef signext i8 @_ZN1ScvcEv(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK5-NEXT: call void @_ZN1SD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR6]] // CHECK5-NEXT: store i8 [[CALL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK5-NEXT: store i8 [[TMP0]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, i64 [[TMP1]]) +// CHECK5-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: store i8 [[TMP1]], ptr [[TMP0]], align 1 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: // CHECK5-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -2907,70 +3086,84 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR3]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +// CHECK5-NEXT: store i8 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..11, i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP11:%.*]] = sext i8 [[TMP10]] to i32 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB3]], i32 [[TMP4]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK5-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK5-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2979,12 +3172,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2994,43 +3186,47 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK5-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 99 +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 99 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: invoke void @_Z3foov() @@ -3040,27 +3236,27 @@ // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK5-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK5-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 100, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void // CHECK5: terminate.lpad: -// CHECK5-NEXT: [[TMP13:%.*]] = landingpad { ptr, i32 } +// CHECK5-NEXT: [[TMP18:%.*]] = landingpad { ptr, i32 } // CHECK5-NEXT: catch ptr null -// CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { ptr, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP14]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP19:%.*]] = extractvalue { ptr, i32 } [[TMP18]], 0 +// CHECK5-NEXT: call void @__clang_call_terminate(ptr [[TMP19]]) #[[ATTR9]], !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp @@ -296,29 +296,36 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -334,62 +341,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -399,12 +412,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -419,14 +431,18 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -442,75 +458,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM3]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -523,19 +539,19 @@ // CHECK1-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK1-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK1-NEXT: store ptr [[TEST]], ptr [[VAR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 @@ -566,12 +582,12 @@ // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -607,15 +623,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -623,88 +641,99 @@ // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -713,12 +742,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -728,104 +756,108 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK1-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK1: arrayctor.loop: // CHECK1-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK1-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK1-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP12]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 [[IDXPROM5]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP17]], i64 4, i1 false), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] // CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] @@ -849,7 +881,7 @@ // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK1-NEXT: ret void @@ -863,7 +895,7 @@ // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1048,29 +1080,36 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l98 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1086,60 +1125,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1149,12 +1194,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1169,14 +1213,18 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 @@ -1190,73 +1238,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP17]]) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP20]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP25]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1269,19 +1317,19 @@ // CHECK3-SAME: () #[[ATTR6:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 // CHECK3-NEXT: [[VAR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK3-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK3-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK3-NEXT: store ptr [[TEST]], ptr [[VAR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 @@ -1312,12 +1360,12 @@ // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1353,15 +1401,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1369,86 +1419,97 @@ // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i32 [[TMP7]], i32 [[TMP8]]), !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN4]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1457,12 +1518,11 @@ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1472,100 +1532,104 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK3-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK3: arrayctor.loop: // CHECK3-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK3-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK3-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP12]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 [[TMP18]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP17]], i32 4, i1 false), !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 +// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP24]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] // CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1589,7 +1653,7 @@ // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK3-NEXT: ret void @@ -1603,7 +1667,7 @@ // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -2516,17 +2580,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2534,61 +2600,72 @@ // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2597,12 +2674,11 @@ // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2615,77 +2691,81 @@ // CHECK9-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP3]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -118,71 +118,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l36 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 4), !llvm.access.group !6 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..1, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !6 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 4), !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -191,12 +204,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -206,60 +218,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -270,71 +286,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l39 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..2) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 3), !llvm.access.group !15 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..3, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 3), !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -343,12 +372,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -358,60 +386,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -455,71 +487,84 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l29 // CHECK1-SAME: () #[[ATTR1]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @.omp_outlined..4) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 999 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !21 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..5, i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !21 +// CHECK1-NEXT: call void @__kmpc_push_proc_bind(ptr @[[GLOB3]], i32 [[TMP2]], i32 2), !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..5, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -528,12 +573,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -543,60 +587,64 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 999, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP6]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 999 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 999 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 999, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 1000, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -632,22 +680,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 @@ -657,22 +705,22 @@ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV5]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK3: omp.inner.for.cond7: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB4]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK3: omp.inner.for.body9: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD11]], ptr [[I6]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK3: omp.body.continue12: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK3: omp.inner.for.inc13: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV5]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end15: // CHECK3-NEXT: store i32 1000, ptr [[I6]], align 4 @@ -694,22 +742,22 @@ // CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: store i32 1000, ptr [[I]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -112,34 +112,34 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l70.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l70.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l70(i64 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -152,204 +152,222 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !5 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -363,15 +381,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -383,15 +401,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -407,41 +425,41 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret i32 0 @@ -451,204 +469,222 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i64 [[TMP9]], i64 [[TMP11]], ptr [[T_VAR1]]), !llvm.access.group !14 +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[T_VAR2]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR2]], ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK1-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -662,15 +698,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -682,15 +718,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -716,34 +752,34 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l70.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l70.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l70(i32 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -756,200 +792,218 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[SIVAR1]]), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -963,15 +1017,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -983,15 +1037,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1007,41 +1061,41 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB4]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret i32 0 @@ -1051,200 +1105,218 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..4, i32 [[TMP8]], i32 [[TMP9]], ptr [[T_VAR1]]), !llvm.access.group !15 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.6, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP23]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) -// CHECK3-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.5, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: switch i32 [[TMP23]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP26]] monotonic, align 4 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -1258,15 +1330,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1278,15 +1350,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -1315,26 +1387,26 @@ // CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 @@ -1361,37 +1433,37 @@ // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK5-NEXT: ret i32 0 // @@ -1414,26 +1486,26 @@ // CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 @@ -1460,37 +1532,37 @@ // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK7-NEXT: ret i32 0 // @@ -1509,208 +1581,226 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[SIVAR1]]), !llvm.access.group !4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP19]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP22]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP22]] monotonic, align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP25]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[SIVAR2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK9-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[SIVAR2]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP13]], align 8, !llvm.access.group !8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !8 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR2]], ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) -// CHECK9-NEXT: switch i32 [[TMP20]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: switch i32 [[TMP24]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP6]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce_nowait(ptr @[[GLOB3]], i32 [[TMP10]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[SIVAR2]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP23]] monotonic, align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = atomicrmw add ptr [[TMP6]], i32 [[TMP27]] monotonic, align 4 // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -1724,15 +1814,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // @@ -1744,15 +1834,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -437,75 +437,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -514,13 +530,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -530,67 +544,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -602,75 +620,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -679,13 +713,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -695,67 +727,71 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -767,75 +803,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -844,13 +896,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -860,88 +910,92 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -953,75 +1007,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1030,13 +1100,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1046,55 +1114,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1102,9 +1174,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1116,75 +1188,91 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK1-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1193,13 +1281,11 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1209,55 +1295,59 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: @@ -1265,9 +1355,9 @@ // CHECK1: omp.dispatch.inc: // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1501,75 +1591,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP8]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1578,13 +1684,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1594,67 +1698,71 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1666,75 +1774,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP17]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1743,13 +1867,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1759,67 +1881,71 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP20]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1831,75 +1957,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1908,13 +2050,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1924,88 +2064,92 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[CONV2]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP12]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[CONV2]] // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP13]] to i32 // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK2-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2017,75 +2161,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2094,13 +2254,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2110,55 +2268,59 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2166,9 +2328,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2180,75 +2342,91 @@ // CHECK2-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK2-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2257,13 +2435,11 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2273,55 +2449,59 @@ // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK2-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK2: omp.dispatch.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: @@ -2329,9 +2509,9 @@ // CHECK2: omp.dispatch.inc: // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK2-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: // CHECK2-NEXT: store i32 123, ptr [[I]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2565,73 +2745,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2640,13 +2836,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2656,64 +2850,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2725,73 +2923,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2800,13 +3014,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2816,64 +3028,68 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK5-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2885,73 +3101,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2960,13 +3192,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2976,83 +3206,87 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK5-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK5-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3064,73 +3298,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3139,13 +3389,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3155,52 +3403,56 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3208,9 +3460,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3222,73 +3474,89 @@ // CHECK5-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK5-NEXT: ret void // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK5: cond.true: // CHECK5-NEXT: br label [[COND_END:%.*]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: br label [[COND_END]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK5-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK5-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3297,13 +3565,11 @@ // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK5-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3313,52 +3579,56 @@ // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK5-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: @@ -3366,9 +3636,9 @@ // CHECK5: omp.dispatch.inc: // CHECK5-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK5-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3602,73 +3872,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..1, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP9]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3677,13 +3963,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3693,64 +3977,68 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK6-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK6-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3762,73 +4050,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..3, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP18]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3837,13 +4141,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3853,64 +4155,68 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP17]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK6-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK6-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3922,73 +4228,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..7, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP24]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3997,13 +4319,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4013,83 +4333,87 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK6-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP7]], [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP21]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK6-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] // CHECK6-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] // CHECK6-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK6-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK6-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4101,73 +4425,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..11, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP30]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP30]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP30]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4176,13 +4516,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4192,52 +4530,56 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] -// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK6-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4245,9 +4587,9 @@ // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK6-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4259,73 +4601,89 @@ // CHECK6-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[TMP0]]) +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK6-NEXT: ret void // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK6-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK6: cond.true: // CHECK6-NEXT: br label [[COND_END:%.*]] // CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: br label [[COND_END]] // CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK6-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..15, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK6-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK6-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP36]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK6-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4334,13 +4692,11 @@ // // // CHECK6-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK6-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK6-NEXT: entry: // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK6-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4350,52 +4706,56 @@ // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK6-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK6-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 61) +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK6-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 61) // CHECK6-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK6: omp.dispatch.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK6: omp.dispatch.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP18]] // CHECK6-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK6-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: @@ -4403,9 +4763,9 @@ // CHECK6: omp.dispatch.inc: // CHECK6-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK6: omp.dispatch.end: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK6-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: // CHECK6-NEXT: store i32 123, ptr [[I]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -5221,23 +5581,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5247,81 +5612,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5334,15 +5716,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5355,90 +5733,94 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5456,23 +5838,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5482,81 +5869,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -5569,15 +5973,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5590,90 +5990,94 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK13-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -5693,7 +6097,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -5702,22 +6106,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5727,111 +6135,129 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK13-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK13-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK13-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK13-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK13-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK13: cond.true11: -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13:%.*]] // CHECK13: cond.false12: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[COND_END13]] // CHECK13: cond.end13: -// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK13-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK13-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK13-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK13-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK13-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) +// CHECK13-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK13-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK13-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK13-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK13-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -5844,16 +6270,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -5866,91 +6288,97 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK13-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -5968,23 +6396,28 @@ // CHECK13-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -5994,81 +6427,98 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK13-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK13-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -6081,15 +6531,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -6102,73 +6548,77 @@ // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6176,12 +6626,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -6201,7 +6651,7 @@ // CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -6210,22 +6660,26 @@ // CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6235,86 +6689,104 @@ // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]), !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK13-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK13-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK13-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK13-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK13-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK13-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK13-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK13-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -6327,16 +6799,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -6349,75 +6817,81 @@ // CHECK13-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK13-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK13-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK13: omp.precond.then: // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK13-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK13-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -6425,12 +6899,12 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK13-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK13-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK13-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK13-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -6667,75 +7141,91 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6744,13 +7234,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6760,66 +7248,70 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6831,75 +7323,91 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6908,13 +7416,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -6924,66 +7430,70 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK13-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK13-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -6997,88 +7507,104 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7087,14 +7613,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7104,89 +7628,95 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: -// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK13-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK13-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK13-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK13-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK13-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7198,75 +7728,91 @@ // CHECK13-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK13-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7275,13 +7821,11 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7291,54 +7835,58 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK13-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK13-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7346,9 +7894,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK13-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK13-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7362,88 +7910,104 @@ // CHECK13-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK13-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK13-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK13-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK13-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK13-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK13: cond.true: // CHECK13-NEXT: br label [[COND_END:%.*]] // CHECK13: cond.false: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: br label [[COND_END]] // CHECK13: cond.end: -// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK13-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK13-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK13-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK13-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK13-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: -// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK13-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7452,14 +8016,12 @@ // // // CHECK13-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK13-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK13-NEXT: entry: // CHECK13-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK13-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK13-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK13-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -7469,56 +8031,62 @@ // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK13-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK13-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK13-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK13-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK13-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK13-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK13-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK13-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK13: omp.dispatch.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK13-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK13: omp.dispatch.body: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK13-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: @@ -7526,9 +8094,9 @@ // CHECK13: omp.dispatch.inc: // CHECK13-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK13: omp.dispatch.end: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK13-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK13-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK13: .omp.final.then: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 // CHECK13-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -7959,23 +8527,28 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -7985,81 +8558,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP13]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8072,15 +8662,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8093,90 +8679,94 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17:![0-9]+]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP17]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK14-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -8194,23 +8784,28 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8220,81 +8815,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP22]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8307,15 +8919,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8328,90 +8936,94 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK14-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -8431,7 +9043,7 @@ // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -8440,22 +9052,26 @@ // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8465,111 +9081,129 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i64 [[TMP18]], i64 [[TMP20]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP22]]), !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i64 +// CHECK14-NEXT: store i64 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP26:%.*]] = zext i32 [[TMP25]] to i64 +// CHECK14-NEXT: store i64 [[TMP26]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: store i32 [[TMP33]], ptr [[TMP32]], align 8, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK14-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] +// CHECK14-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] // CHECK14-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] +// CHECK14-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] // CHECK14-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK14: cond.true11: -// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[COND_END13:%.*]] // CHECK14: cond.false12: -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[COND_END13]] // CHECK14: cond.end13: -// CHECK14-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE11]] ], [ [[TMP32]], [[COND_FALSE12]] ] +// CHECK14-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] // CHECK14-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK14-NEXT: store i32 [[TMP33]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK14-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP35]]) -// CHECK14-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP37:%.*]] = icmp ne i32 [[TMP36]], 0 -// CHECK14-NEXT: br i1 [[TMP37]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP46]]) +// CHECK14-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 +// CHECK14-NEXT: br i1 [[TMP48]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP38]], 0 +// CHECK14-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP49]], 0 // CHECK14-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK14-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -8582,16 +9216,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8604,91 +9234,97 @@ // CHECK14-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK14-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK14-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1 // CHECK14-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] @@ -8706,23 +9342,28 @@ // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8732,81 +9373,98 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +// CHECK14-NEXT: store i64 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP34]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK14-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -8819,15 +9477,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -8840,73 +9494,77 @@ // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK14-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP28]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK14-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -8914,12 +9572,12 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK14-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK14-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK14-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK14-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -8939,7 +9597,7 @@ // CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -8948,22 +9606,26 @@ // CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -8973,86 +9635,104 @@ // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i64 [[TMP17]], i64 [[TMP19]], ptr [[TMP0]], i64 [[TMP1]], ptr [[TMP2]], i64 [[TMP21]]), !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 +// CHECK14-NEXT: store i64 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +// CHECK14-NEXT: store i64 [[TMP25]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP27]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: store i64 [[TMP4]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK14-NEXT: store ptr [[TMP6]], ptr [[TMP30]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: store i32 [[TMP32]], ptr [[TMP31]], align 8, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP40]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK14-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP35:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP36]]) +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK14-NEXT: br i1 [[TMP38]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP39]], 0 // CHECK14-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -9065,16 +9745,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -9087,75 +9763,81 @@ // CHECK14-NEXT: [[I5:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK14-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 5 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK14-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK14: omp.precond.then: // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK14-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP18]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK14-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK14-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43:![0-9]+]] +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP43]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -9163,12 +9845,12 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK14-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK14-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -9405,75 +10087,91 @@ // CHECK14-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP46]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9482,13 +10180,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9498,66 +10194,70 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK14-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK14-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9569,75 +10269,91 @@ // CHECK14-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP52]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9646,13 +10362,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9662,66 +10376,70 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK14-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK14-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK14-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9735,88 +10453,104 @@ // CHECK14-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK14-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP58]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK14-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9825,14 +10559,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -9842,89 +10574,95 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK14-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP7]] to i32 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[CONV2]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP15]] to i32 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[CONV2]] // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP16]] to i32 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[CONV3]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61:![0-9]+]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK14-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP61]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK14-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK14-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK14-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK14-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -9936,75 +10674,91 @@ // CHECK14-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_20:%.*]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_21:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_20:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64:![0-9]+]] +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: store i64 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP64]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP64]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP64]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK14-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10013,13 +10767,11 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10029,54 +10781,58 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_21]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP7]] to i32 +// CHECK14-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP8]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK14-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]] +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] -// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK14-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP67]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -10084,9 +10840,9 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK14-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK14-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10100,88 +10856,104 @@ // CHECK14-NEXT: [[M_ADDR:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_22:%.*]], align 8 // CHECK14-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i64 [[TMP3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_24:%.*]], align 8 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_22:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_22]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK14-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK14-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70:![0-9]+]] +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i64 [[TMP9]], i64 [[TMP11]], ptr [[TMP0]], i64 [[TMP13]]), !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 +// CHECK14-NEXT: store i64 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK14-NEXT: store i64 [[TMP15]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK14-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK14-NEXT: store ptr [[TMP2]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP70]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP70]] +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP70]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK14-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10190,14 +10962,12 @@ // // // CHECK14-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK14-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -10207,56 +10977,62 @@ // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK14-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_24:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_24]], ptr [[TMP0]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK14-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK14-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK14-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK14-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK14-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK14-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP10]] to i32 // CHECK14-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK14-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK14-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK14: omp.dispatch.body: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]] +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK14-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] -// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK14-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP73]] // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK14: omp.inner.for.end: @@ -10264,9 +11040,9 @@ // CHECK14: omp.dispatch.inc: // CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK14-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK14-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK14: .omp.final.then: // CHECK14-NEXT: store i32 10, ptr [[I]], align 4 // CHECK14-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -10701,23 +11477,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10727,79 +11508,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -10812,15 +11610,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10833,87 +11627,91 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -10931,23 +11729,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -10957,79 +11760,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11042,15 +11862,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11063,87 +11879,91 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -11163,7 +11983,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -11172,22 +11992,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11197,109 +12021,127 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK17-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK17-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK17-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: -// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK17-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK17-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK17-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK17-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK17-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK17-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK17-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK17-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK17-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -11312,16 +12154,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11334,88 +12172,94 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK17-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK17-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -11433,23 +12277,28 @@ // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11459,79 +12308,96 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK17-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -11544,15 +12410,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -11565,70 +12427,74 @@ // CHECK17-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 35, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11636,12 +12502,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK17-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK17-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -11661,7 +12527,7 @@ // CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -11670,22 +12536,26 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11695,84 +12565,102 @@ // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]), !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK17-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK17-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK17-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -11785,16 +12673,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -11807,72 +12691,78 @@ // CHECK17-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK17-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK17-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK17-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK17: omp.precond.then: // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 35, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 35, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK17-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK17-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -11880,12 +12770,12 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK17-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK17-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK17-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK17-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -12122,73 +13012,89 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12197,13 +13103,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12213,63 +13117,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12281,73 +13189,89 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12356,13 +13280,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12372,63 +13294,67 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK17-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK17-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12442,86 +13368,102 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12530,14 +13472,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12547,84 +13487,90 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK17-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK17-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK17-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK17-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK17-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12636,73 +13582,89 @@ // CHECK17-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK17-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12711,13 +13673,11 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12727,51 +13687,55 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 35, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 35, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK17-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK17-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12779,9 +13743,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12795,86 +13759,102 @@ // CHECK17-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK17-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK17-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -12883,14 +13863,12 @@ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK17-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -12900,53 +13878,59 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK17-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 35, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK17-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 35, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK17-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK17: omp.dispatch.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK17-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK17-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK17: omp.dispatch.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK17-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK17-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK17: omp.inner.for.end: @@ -12954,9 +13938,9 @@ // CHECK17: omp.dispatch.inc: // CHECK17-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK17: omp.dispatch.end: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK17-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK17-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 10, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -13391,23 +14375,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13417,79 +14406,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..1, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13502,15 +14508,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13523,87 +14525,91 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -13621,23 +14627,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..2, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13647,79 +14658,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..3, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP23]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP23]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -13732,15 +14760,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -13753,87 +14777,91 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP18]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26:![0-9]+]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP24]], [[TMP25]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP26]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP27]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP28]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP26]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -13853,7 +14881,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -13862,22 +14890,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..6, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..6, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -13887,109 +14919,127 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP21]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..7, i32 [[TMP17]], i32 [[TMP18]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP20]]), !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP31]], ptr [[TMP30]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK19-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK19-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] +// CHECK19-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: -// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] +// CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE11]] ], [ [[TMP41]], [[COND_FALSE12]] ] // CHECK19-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] -// CHECK19-NEXT: store i32 [[TMP31]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP29]] +// CHECK19-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP29]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 -// CHECK19-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP44]]) +// CHECK19-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK19-NEXT: br i1 [[TMP46]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP36]], 0 +// CHECK19-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP47]], 0 // CHECK19-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 // CHECK19-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] @@ -14002,16 +15052,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14024,88 +15070,94 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP20]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP23]], [[COND_TRUE]] ], [ [[TMP24]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP28]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP19]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP29]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP30]], 1 // CHECK19-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 // CHECK19-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] @@ -14123,23 +15175,28 @@ // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..10, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14149,79 +15206,96 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 5, ptr @.omp_outlined..11, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]), !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: store i32 [[TMP21]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..11, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP21]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP30]]) +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK19-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL]] @@ -14234,15 +15308,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..11 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -14255,70 +15325,74 @@ // CHECK19-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP13]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP20]], i32 1073741859, i32 [[TMP17]], i32 [[TMP18]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP14]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP22]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP23]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP25]], [[TMP26]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP27]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP28]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14326,12 +15400,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP24]], 0 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK19-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 // CHECK19-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]] @@ -14351,7 +15425,7 @@ // CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -14360,22 +15434,26 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..14, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..14, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..14 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14385,84 +15463,102 @@ // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 6, ptr @.omp_outlined..15, i32 [[TMP16]], i32 [[TMP17]], ptr [[TMP0]], i32 [[TMP1]], ptr [[TMP2]], i32 [[TMP19]]), !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP22]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP23]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store ptr [[TMP6]], ptr [[TMP28]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: store i32 [[TMP30]], ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..15, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]]) -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0 -// CHECK19-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP34]]) +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP36:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK19-NEXT: br i1 [[TMP36]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP26]], 0 +// CHECK19-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP37]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL]] @@ -14475,16 +15571,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..15 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -14497,72 +15589,78 @@ // CHECK19-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP14]], 0 // CHECK19-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK19-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK19-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK19-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK19: omp.precond.then: // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP13]], i32 1073741859, i32 [[TMP10]], i32 [[TMP11]], i32 1, i32 [[TMP9]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP23]], i32 1073741859, i32 [[TMP20]], i32 [[TMP21]], i32 1, i32 [[TMP19]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP25]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP30]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP21]] +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP31]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 // CHECK19-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -14570,12 +15668,12 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK19-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 +// CHECK19-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP35]], 0 // CHECK19-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK19-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -14812,73 +15910,89 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..18, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..18 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..19, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..19, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP47]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP47]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14887,13 +16001,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..19 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -14903,63 +16015,67 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -14971,73 +16087,89 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..22, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..22 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..23, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..23, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP53]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP53]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15046,13 +16178,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..23 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15062,63 +16192,67 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP10]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56:![0-9]+]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP17]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP56]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK19-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP10]]) +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK19-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15132,86 +16266,102 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..26, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..26, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..26 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..27, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..27, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP59]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP59]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15220,14 +16370,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..27 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15237,84 +16385,90 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP5]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP4]], align 4 // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62:![0-9]+]] +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP24]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] -// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] +// CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP25]], 1 // CHECK19-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP62]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] // CHECK19-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP5]]) -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK19-NEXT: br i1 [[TMP23]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = icmp ne i32 [[TMP30]], 0 +// CHECK19-NEXT: br i1 [[TMP31]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15326,73 +16480,89 @@ // CHECK19-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..30, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..30 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 3, ptr @.omp_outlined..31, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]]), !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP12]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..31, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP65]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP65]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK19-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15401,13 +16571,11 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..31 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15417,51 +16585,55 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP6]], i32 1073741859, i32 [[TMP3]], i32 [[TMP4]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 1073741859, i32 [[TMP9]], i32 [[TMP10]], i32 1, i32 1) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP6]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP7]], 0 +// CHECK19-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP12]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP13]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68:![0-9]+]] +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP12]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP18]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP68]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15469,9 +16641,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 +// CHECK19-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15485,86 +16657,102 @@ // CHECK19-NEXT: [[M_ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_17:%.*]], align 4 // CHECK19-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[M_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @.omp_outlined..34, ptr [[TMP0]], i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..34, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..34 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_LB:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[DOTCAPTURED_OMP_PREVIOUS_UB:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_18:%.*]], align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71:![0-9]+]] +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 4, ptr @.omp_outlined..35, i32 [[TMP8]], i32 [[TMP9]], ptr [[TMP0]], i32 [[TMP11]]), !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_LB]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[DOTCAPTURED_OMP_PREVIOUS_UB]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP2]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..35, ptr [[OMP_OUTLINED_ARG_AGG_]]), !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] -// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP71]] +// CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP71]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK19-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -15573,14 +16761,12 @@ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..35 -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -15590,53 +16776,59 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP7]], i32 1073741859, i32 [[TMP4]], i32 [[TMP5]], i32 1, i32 [[TMP3]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB3]], i32 [[TMP15]], i32 1073741859, i32 [[TMP12]], i32 [[TMP13]], i32 1, i32 [[TMP11]]) // CHECK19-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK19: omp.dispatch.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP7]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) -// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP8]], 0 +// CHECK19-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB3]], i32 [[TMP15]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]]) +// CHECK19-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP16]], 0 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK19: omp.dispatch.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74:![0-9]+]] +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK19-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] -// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] +// CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK19-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP74]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: @@ -15644,9 +16836,9 @@ // CHECK19: omp.dispatch.inc: // CHECK19-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK19: omp.dispatch.end: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK19-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK19-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 10, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp @@ -274,15 +274,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -297,6 +299,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -312,68 +316,68 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -470,15 +474,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -494,6 +500,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -511,65 +519,65 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -793,15 +801,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l93 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -816,6 +826,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -831,66 +843,66 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -987,15 +999,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1011,6 +1025,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1028,63 +1044,63 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP15]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP16]], [[OMP_LOOP_EXIT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -1284,17 +1300,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -1310,63 +1328,65 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 // CHECK9-NEXT: store i32 1, ptr [[G]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4 // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8 -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp @@ -97,34 +97,34 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63(i64 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -137,18 +137,21 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -159,74 +162,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -240,15 +245,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -264,41 +269,41 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret i32 0 @@ -308,18 +313,21 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -330,74 +338,76 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -411,15 +421,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -445,34 +455,34 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63(i32 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -485,18 +495,21 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -507,74 +520,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -588,15 +603,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -612,41 +627,41 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret i32 0 @@ -656,18 +671,21 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -678,74 +696,76 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP15]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP18]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP18]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -759,15 +779,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -792,18 +812,21 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -815,77 +838,79 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9: omp.inner.for.body: +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8 // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]) // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP16]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP19]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP19]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -899,15 +924,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -386,6 +386,7 @@ // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK1-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK1-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -396,112 +397,118 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[I_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 0 +// CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK1-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK1-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -514,20 +521,24 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -540,80 +551,82 @@ // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK1-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK1-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -807,6 +820,7 @@ // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK3-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -817,111 +831,117 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[I_ADDR]], ptr [[N_ADDR]], ptr [[TMP1]]) +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[I_ADDR]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: store i32 0, ptr [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: -// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP2]], i32 0, i32 [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP6]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK3-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP23]], 0 -// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 +// CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 +// CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[TMP2]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: br label [[OMP_PRECOND_END]] @@ -934,20 +954,24 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined..1, ptr [[N_ADDR]], ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[N_ADDR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[A:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -960,79 +984,81 @@ // CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP10]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP13]], [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP1]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK3-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP22]], 0 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK3-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1417,23 +1443,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1446,82 +1477,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1638,23 +1671,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1667,81 +1705,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1998,18 +2038,21 @@ // CHECK17-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK17-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2019,62 +2062,64 @@ // CHECK17-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK17-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK17: cond.true: // CHECK17-NEXT: br label [[COND_END:%.*]] // CHECK17: cond.false: -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: br label [[COND_END]] // CHECK17: cond.end: -// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK17-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK17-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP9:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP11:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK17-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK17-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: -// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK17-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK17-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK17-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK17: .omp.final.then: // CHECK17-NEXT: store i32 123, ptr [[I]], align 4 // CHECK17-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2175,18 +2220,21 @@ // CHECK19-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK19-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2196,61 +2244,63 @@ // CHECK19-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK19-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK19-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK19: cond.true: // CHECK19-NEXT: br label [[COND_END:%.*]] // CHECK19: cond.false: -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK19-NEXT: br label [[COND_END]] // CHECK19: cond.end: -// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK19-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK19-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP9:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP10]] +// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP11:%.*]] = load float, ptr [[B]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP11]] to i32 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP12]] // CHECK19-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK19-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: -// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK19-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK19-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK19-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK19: .omp.final.then: // CHECK19-NEXT: store i32 123, ptr [[I]], align 4 // CHECK19-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2352,7 +2402,7 @@ // CHECK21-NEXT: entry: // CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 @@ -2360,22 +2410,24 @@ // CHECK21-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK21-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK21-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK21-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP3]]) +// CHECK21-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP3]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK21-NEXT: ret void // // // CHECK21-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK21-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK21-NEXT: entry: // CHECK21-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK21-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK21-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK21-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK21-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2385,99 +2437,105 @@ // CHECK21-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK21-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK21-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK21-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK21-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK21-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK21: cond.true: // CHECK21-NEXT: br label [[COND_END:%.*]] // CHECK21: cond.false: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK21-NEXT: br label [[COND_END]] // CHECK21: cond.end: -// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK21-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK21-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK21-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP10:%.*]] = load float, ptr [[B]], align 4, !nontemporal !5, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[CONV:%.*]] = fptosi float [[TMP10]] to i32 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP14:%.*]] = load float, ptr [[B]], align 4, !nontemporal !5, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[CONV:%.*]] = fptosi float [[TMP14]] to i32 +// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK21-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK21-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] -// CHECK21: omp.inner.for.cond3: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK21-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END16:%.*]] -// CHECK21: omp.inner.for.body5: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK21-NEXT: store i32 [[ADD7]], ptr [[I]], align 4 -// CHECK21-NEXT: [[B8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP16:%.*]] = load float, ptr [[B8]], align 4 -// CHECK21-NEXT: [[CONV9:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK21-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK21-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK21-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [123 x i32], ptr [[A10]], i64 0, i64 [[IDXPROM11]] -// CHECK21-NEXT: store i32 [[CONV9]], ptr [[ARRAYIDX12]], align 4 -// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE13:%.*]] -// CHECK21: omp.body.continue13: -// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC14:%.*]] -// CHECK21: omp.inner.for.inc14: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK21-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP9:![0-9]+]] -// CHECK21: omp.inner.for.end16: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]] +// CHECK21: omp.inner.for.cond4: +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END17:%.*]] +// CHECK21: omp.inner.for.body6: +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK21-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK21-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK21-NEXT: [[TMP20:%.*]] = load float, ptr [[B9]], align 4 +// CHECK21-NEXT: [[CONV10:%.*]] = fptosi float [[TMP20]] to i32 +// CHECK21-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK21-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK21-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [123 x i32], ptr [[A11]], i64 0, i64 [[IDXPROM12]] +// CHECK21-NEXT: store i32 [[CONV10]], ptr [[ARRAYIDX13]], align 4 +// CHECK21-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] +// CHECK21: omp.body.continue14: +// CHECK21-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] +// CHECK21: omp.inner.for.inc15: +// CHECK21-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK21-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK21: omp.inner.for.end17: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: -// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK21-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK21-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK21-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK21: .omp.final.then: // CHECK21-NEXT: store i32 123, ptr [[I]], align 4 // CHECK21-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2579,7 +2637,7 @@ // CHECK23-NEXT: entry: // CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 @@ -2587,22 +2645,24 @@ // CHECK23-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP2]] to i1 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK23-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK23-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK23-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP3]]) +// CHECK23-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP3]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK23-NEXT: ret void // // // CHECK23-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] { +// CHECK23-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK23-NEXT: entry: // CHECK23-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK23-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK23-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK23-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK23-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2612,97 +2672,103 @@ // CHECK23-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK23-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK23-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 +// CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK23-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 122 // CHECK23-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK23: cond.true: // CHECK23-NEXT: br label [[COND_END:%.*]] // CHECK23: cond.false: -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK23-NEXT: br label [[COND_END]] // CHECK23: cond.end: -// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK23-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] // CHECK23-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 -// CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP10]] to i1 +// CHECK23-NEXT: br i1 [[TOBOOL1]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] +// CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP10:%.*]] = load float, ptr [[B]], align 4, !nontemporal !6, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[CONV:%.*]] = fptosi float [[TMP10]] to i32 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP14:%.*]] = load float, ptr [[B]], align 4, !nontemporal !6, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[CONV:%.*]] = fptosi float [[TMP14]] to i32 +// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP15]] // CHECK23-NEXT: store i32 [[CONV]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK23-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND3:%.*]] -// CHECK23: omp.inner.for.cond3: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK23-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] -// CHECK23-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY5:%.*]], label [[OMP_INNER_FOR_END15:%.*]] -// CHECK23: omp.inner.for.body5: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP15]], 1 -// CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK23-NEXT: store i32 [[ADD7]], ptr [[I]], align 4 -// CHECK23-NEXT: [[B8:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP16:%.*]] = load float, ptr [[B8]], align 4 -// CHECK23-NEXT: [[CONV9:%.*]] = fptosi float [[TMP16]] to i32 -// CHECK23-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4 -// CHECK23-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [123 x i32], ptr [[A10]], i32 0, i32 [[TMP17]] -// CHECK23-NEXT: store i32 [[CONV9]], ptr [[ARRAYIDX11]], align 4 -// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] -// CHECK23: omp.body.continue12: -// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] -// CHECK23: omp.inner.for.inc13: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK23-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND3]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK23: omp.inner.for.end15: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]] +// CHECK23: omp.inner.for.cond4: +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY6:%.*]], label [[OMP_INNER_FOR_END16:%.*]] +// CHECK23: omp.inner.for.body6: +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[MUL7:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK23-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK23-NEXT: store i32 [[ADD8]], ptr [[I]], align 4 +// CHECK23-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 1 +// CHECK23-NEXT: [[TMP20:%.*]] = load float, ptr [[B9]], align 4 +// CHECK23-NEXT: [[CONV10:%.*]] = fptosi float [[TMP20]] to i32 +// CHECK23-NEXT: [[A11:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[TMP2]], i32 0, i32 0 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4 +// CHECK23-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [123 x i32], ptr [[A11]], i32 0, i32 [[TMP21]] +// CHECK23-NEXT: store i32 [[CONV10]], ptr [[ARRAYIDX12]], align 4 +// CHECK23-NEXT: br label [[OMP_BODY_CONTINUE13:%.*]] +// CHECK23: omp.body.continue13: +// CHECK23-NEXT: br label [[OMP_INNER_FOR_INC14:%.*]] +// CHECK23: omp.inner.for.inc14: +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK23-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK23: omp.inner.for.end16: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: -// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK23-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK23-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 +// CHECK23-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK23: .omp.final.then: // CHECK23-NEXT: store i32 123, ptr [[I]], align 4 // CHECK23-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3136,23 +3202,28 @@ // CHECK33-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK33-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK33-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK33-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK33-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK33-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK33-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3165,82 +3236,84 @@ // CHECK33-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK33-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK33-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK33-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK33-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK33-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK33-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK33-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK33-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK33-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK33-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK33-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK33-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK33-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK33: omp.precond.then: // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK33-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK33-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK33-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK33-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK33-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK33: cond.true: -// CHECK33-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK33-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK33-NEXT: br label [[COND_END:%.*]] // CHECK33: cond.false: -// CHECK33-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: br label [[COND_END]] // CHECK33: cond.end: -// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK33-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK33-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK33-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK33-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK33-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK33-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK33-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK33-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK33-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK33-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK33-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK33-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK33-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK33-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: -// CHECK33-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK33-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK33-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK33-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK33-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK33-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK33-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK33: .omp.final.then: -// CHECK33-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK33-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK33-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK33-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK33-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK33-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK33-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3330,6 +3403,7 @@ // CHECK33-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK33-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK33-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -3338,16 +3412,18 @@ // CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK33-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK33-NEXT: ret void // // // CHECK33-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK33-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK33-NEXT: entry: // CHECK33-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK33-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK33-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK33-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK33-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3357,58 +3433,60 @@ // CHECK33-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK33-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK33-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK33-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK33-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK33-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK33-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK33-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK33-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK33-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK33: cond.true: // CHECK33-NEXT: br label [[COND_END:%.*]] // CHECK33: cond.false: -// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK33-NEXT: br label [[COND_END]] // CHECK33: cond.end: -// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK33-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK33-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK33-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK33-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK33-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK33-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK33-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK33-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK33-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK33-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK33-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK33-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: -// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK33-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK33-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK33-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK33-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK33-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK33-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK33-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK33: .omp.final.then: // CHECK33-NEXT: store i32 10, ptr [[I]], align 4 // CHECK33-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3529,23 +3607,28 @@ // CHECK35-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK35-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK35-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK35-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK35-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK35-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK35-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK35-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3558,81 +3641,83 @@ // CHECK35-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK35-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK35-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK35-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK35-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK35-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK35-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK35-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK35-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK35-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK35-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK35-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK35-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK35: omp.precond.then: // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK35-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK35-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK35-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK35-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK35-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK35: cond.true: -// CHECK35-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK35-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK35-NEXT: br label [[COND_END:%.*]] // CHECK35: cond.false: -// CHECK35-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: br label [[COND_END]] // CHECK35: cond.end: -// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK35-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK35-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK35-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK35-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK35-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK35-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK35-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK35-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK35-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK35-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK35-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK35-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK35-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK35-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: -// CHECK35-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK35-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK35-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK35-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK35-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK35-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK35-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK35: .omp.final.then: -// CHECK35-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK35-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK35-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK35-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK35-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK35-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK35-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -3722,6 +3807,7 @@ // CHECK35-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK35-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK35-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -3730,16 +3816,18 @@ // CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK35-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK35-NEXT: ret void // // // CHECK35-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK35-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK35-NEXT: entry: // CHECK35-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK35-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK35-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK35-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK35-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3749,57 +3837,59 @@ // CHECK35-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK35-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK35-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK35-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK35-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK35-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK35-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK35-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK35-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK35-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK35-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK35: cond.true: // CHECK35-NEXT: br label [[COND_END:%.*]] // CHECK35: cond.false: -// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK35-NEXT: br label [[COND_END]] // CHECK35: cond.end: -// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK35-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK35-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK35-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK35-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK35-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK35-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK35-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK35-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK35-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK35-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK35-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: -// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK35-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK35-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK35-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK35-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK35-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK35-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK35-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK35: .omp.final.then: // CHECK35-NEXT: store i32 10, ptr [[I]], align 4 // CHECK35-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3932,7 +4022,7 @@ // CHECK37-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK37-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK37-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK37-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8 // CHECK37-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK37-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -3943,24 +4033,28 @@ // CHECK37-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK37-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK37-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK37-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK37-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK37-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK37-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK37-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK37-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK37-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK37-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK37-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK37-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK37-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK37-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK37-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP6]], align 8 +// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK37-NEXT: ret void // // // CHECK37-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK37-NEXT: entry: // CHECK37-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK37-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK37-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK37-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -3973,119 +4067,125 @@ // CHECK37-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK37-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK37-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK37-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK37-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK37-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK37-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK37-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK37-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK37-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK37-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK37-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK37-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK37-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK37-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK37-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK37-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 +// CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK37-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK37-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK37-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK37-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK37-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK37-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK37-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK37-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK37: omp.precond.then: // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK37-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK37-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK37-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK37-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK37-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK37: cond.true: -// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK37-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK37-NEXT: br label [[COND_END:%.*]] // CHECK37: cond.false: -// CHECK37-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: br label [[COND_END]] // CHECK37: cond.end: -// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK37-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK37-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK37-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK37-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK37-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK37-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK37: omp_if.then: // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK37-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK37-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK37-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK37-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK37-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK37-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK37-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 -// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK37-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK37-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK37-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK37-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK37-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_IF_END:%.*]] // CHECK37: omp_if.else: -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] -// CHECK37: omp.inner.for.cond8: -// CHECK37-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK37-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END18:%.*]] -// CHECK37: omp.inner.for.body10: -// CHECK37-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK37-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK37-NEXT: store i32 [[ADD12]], ptr [[I4]], align 4 -// CHECK37-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK37-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK37-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM13]] -// CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX14]], align 4 -// CHECK37-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] -// CHECK37: omp.body.continue15: -// CHECK37-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] -// CHECK37: omp.inner.for.inc16: -// CHECK37-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK37-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP9:![0-9]+]] -// CHECK37: omp.inner.for.end18: +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] +// CHECK37: omp.inner.for.cond9: +// CHECK37-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK37-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END19:%.*]] +// CHECK37: omp.inner.for.body11: +// CHECK37-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK37-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK37-NEXT: store i32 [[ADD13]], ptr [[I4]], align 4 +// CHECK37-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK37-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP29]] to i64 +// CHECK37-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM14]] +// CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX15]], align 4 +// CHECK37-NEXT: br label [[OMP_BODY_CONTINUE16:%.*]] +// CHECK37: omp.body.continue16: +// CHECK37-NEXT: br label [[OMP_INNER_FOR_INC17:%.*]] +// CHECK37: omp.inner.for.inc17: +// CHECK37-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK37-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK37: omp.inner.for.end19: // CHECK37-NEXT: br label [[OMP_IF_END]] // CHECK37: omp_if.end: // CHECK37-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK37: omp.loop.exit: -// CHECK37-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK37-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK37-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK37-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK37-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK37-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK37-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK37: .omp.final.then: -// CHECK37-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK37-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK37-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK37-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 -// CHECK37-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] -// CHECK37-NEXT: store i32 [[ADD22]], ptr [[I4]], align 4 +// CHECK37-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK37-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK37-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK37-NEXT: [[MUL22:%.*]] = mul nsw i32 [[DIV21]], 1 +// CHECK37-NEXT: [[ADD23:%.*]] = add nsw i32 0, [[MUL22]] +// CHECK37-NEXT: store i32 [[ADD23]], ptr [[I4]], align 4 // CHECK37-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK37: .omp.final.done: // CHECK37-NEXT: br label [[OMP_PRECOND_END]] @@ -4171,6 +4271,7 @@ // CHECK37-NEXT: [[TE_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[TH_ADDR:%.*]] = alloca i64, align 8 // CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK37-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK37-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK37-NEXT: store i64 [[TE]], ptr [[TE_ADDR]], align 8 // CHECK37-NEXT: store i64 [[TH]], ptr [[TH_ADDR]], align 8 @@ -4179,16 +4280,18 @@ // CHECK37-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK37-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK37-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK37-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK37-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK37-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK37-NEXT: ret void // // // CHECK37-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK37-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK37-NEXT: entry: // CHECK37-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK37-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK37-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK37-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK37-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4198,58 +4301,60 @@ // CHECK37-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK37-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK37-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK37-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK37-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK37-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK37-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK37-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK37-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK37-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK37-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK37-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK37-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK37-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK37: cond.true: // CHECK37-NEXT: br label [[COND_END:%.*]] // CHECK37: cond.false: -// CHECK37-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK37-NEXT: br label [[COND_END]] // CHECK37: cond.end: -// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK37-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK37-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK37-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK37-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK37-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK37-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK37-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK37-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK37-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK37-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK37-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK37-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK37-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK37-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK37: omp.loop.exit: -// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK37-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK37-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK37-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK37-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK37-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK37-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK37-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK37: .omp.final.then: // CHECK37-NEXT: store i32 10, ptr [[I]], align 4 // CHECK37-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -4382,7 +4487,7 @@ // CHECK39-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 -// CHECK39-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK39-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK39-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK39-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK39-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -4393,24 +4498,28 @@ // CHECK39-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK39-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK39-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK39-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK39-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP3]] to i1 +// CHECK39-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK39-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK39-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK39-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK39-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK39-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK39-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK39-NEXT: [[TMP7:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TOBOOL1:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK39-NEXT: [[FROMBOOL2:%.*]] = zext i1 [[TOBOOL1]] to i8 -// CHECK39-NEXT: store i8 [[FROMBOOL2]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK39-NEXT: store i8 [[FROMBOOL2]], ptr [[TMP6]], align 4 +// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK39-NEXT: ret void // // // CHECK39-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK39-NEXT: entry: // CHECK39-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK39-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK39-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK39-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -4423,117 +4532,123 @@ // CHECK39-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK39-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK39-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK39-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK39-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK39-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK39-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK39-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK39-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK39-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK39-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK39-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK39-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK39-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK39-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK39-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK39-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK39-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 4 +// CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 +// CHECK39-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 +// CHECK39-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK39-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK39-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK39-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK39-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK39-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK39-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK39-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK39: omp.precond.then: // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK39-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK39-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK39-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP14]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK39-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP15]], [[TMP16]] // CHECK39-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK39: cond.true: -// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK39-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK39-NEXT: br label [[COND_END:%.*]] // CHECK39: cond.false: -// CHECK39-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: br label [[COND_END]] // CHECK39: cond.end: -// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ] // CHECK39-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 -// CHECK39-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] +// CHECK39-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK39-NEXT: store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK39-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP20]] to i1 +// CHECK39-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK39: omp_if.then: // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK39-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] -// CHECK39-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK39-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK39-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] +// CHECK39-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 +// CHECK39-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP23]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK39-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[TMP18:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP18]] +// CHECK39-NEXT: [[TMP24:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP24]] // CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK39-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK39-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK39-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK39-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_IF_END:%.*]] // CHECK39: omp_if.else: -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8:%.*]] -// CHECK39: omp.inner.for.cond8: -// CHECK39-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] -// CHECK39-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY10:%.*]], label [[OMP_INNER_FOR_END17:%.*]] -// CHECK39: omp.inner.for.body10: -// CHECK39-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[MUL11:%.*]] = mul nsw i32 [[TMP22]], 1 -// CHECK39-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]] -// CHECK39-NEXT: store i32 [[ADD12]], ptr [[I4]], align 4 -// CHECK39-NEXT: [[TMP23:%.*]] = load i32, ptr [[I4]], align 4 -// CHECK39-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP23]] -// CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX13]], align 4 -// CHECK39-NEXT: br label [[OMP_BODY_CONTINUE14:%.*]] -// CHECK39: omp.body.continue14: -// CHECK39-NEXT: br label [[OMP_INNER_FOR_INC15:%.*]] -// CHECK39: omp.inner.for.inc15: -// CHECK39-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK39-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK39: omp.inner.for.end17: +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND9:%.*]] +// CHECK39: omp.inner.for.cond9: +// CHECK39-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP26]], [[TMP27]] +// CHECK39-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY11:%.*]], label [[OMP_INNER_FOR_END18:%.*]] +// CHECK39: omp.inner.for.body11: +// CHECK39-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[MUL12:%.*]] = mul nsw i32 [[TMP28]], 1 +// CHECK39-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK39-NEXT: store i32 [[ADD13]], ptr [[I4]], align 4 +// CHECK39-NEXT: [[TMP29:%.*]] = load i32, ptr [[I4]], align 4 +// CHECK39-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP29]] +// CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX14]], align 4 +// CHECK39-NEXT: br label [[OMP_BODY_CONTINUE15:%.*]] +// CHECK39: omp.body.continue15: +// CHECK39-NEXT: br label [[OMP_INNER_FOR_INC16:%.*]] +// CHECK39: omp.inner.for.inc16: +// CHECK39-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP30]], 1 +// CHECK39-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK39: omp.inner.for.end18: // CHECK39-NEXT: br label [[OMP_IF_END]] // CHECK39: omp_if.end: // CHECK39-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK39: omp.loop.exit: -// CHECK39-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) -// CHECK39-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK39-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK39-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) +// CHECK39-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK39-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 +// CHECK39-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK39: .omp.final.then: -// CHECK39-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK39-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP29]], 0 -// CHECK39-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK39-NEXT: [[MUL20:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK39-NEXT: [[ADD21:%.*]] = add nsw i32 0, [[MUL20]] -// CHECK39-NEXT: store i32 [[ADD21]], ptr [[I4]], align 4 +// CHECK39-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK39-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP35]], 0 +// CHECK39-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK39-NEXT: [[MUL21:%.*]] = mul nsw i32 [[DIV20]], 1 +// CHECK39-NEXT: [[ADD22:%.*]] = add nsw i32 0, [[MUL21]] +// CHECK39-NEXT: store i32 [[ADD22]], ptr [[I4]], align 4 // CHECK39-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK39: .omp.final.done: // CHECK39-NEXT: br label [[OMP_PRECOND_END]] @@ -4619,6 +4734,7 @@ // CHECK39-NEXT: [[TE_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TH_ADDR:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK39-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK39-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK39-NEXT: store i32 [[TE]], ptr [[TE_ADDR]], align 4 // CHECK39-NEXT: store i32 [[TH]], ptr [[TH_ADDR]], align 4 @@ -4627,16 +4743,18 @@ // CHECK39-NEXT: [[TMP2:%.*]] = load i32, ptr [[TE_ADDR]], align 4 // CHECK39-NEXT: [[TMP3:%.*]] = load i32, ptr [[TH_ADDR]], align 4 // CHECK39-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP1]]) +// CHECK39-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK39-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK39-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK39-NEXT: ret void // // // CHECK39-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK39-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK39-NEXT: entry: // CHECK39-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK39-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK39-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK39-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK39-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -4646,57 +4764,59 @@ // CHECK39-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK39-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK39-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK39-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK39-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK39-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK39-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK39-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK39-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK39-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK39-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK39-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK39-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK39-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK39-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK39: cond.true: // CHECK39-NEXT: br label [[COND_END:%.*]] // CHECK39: cond.false: -// CHECK39-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK39-NEXT: br label [[COND_END]] // CHECK39: cond.end: -// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK39-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK39-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK39-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK39-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK39-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] -// CHECK39-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK39-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]] +// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK39-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK39-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK39-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] -// CHECK39-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK39-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] +// CHECK39-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK39-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP14]] // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK39: omp.loop.exit: -// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK39-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK39-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK39-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK39-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK39-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK39-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK39: .omp.final.then: // CHECK39-NEXT: store i32 10, ptr [[I]], align 4 // CHECK39-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp @@ -120,34 +120,34 @@ // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -155,26 +155,29 @@ // CHECK1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i64 0, i64 0 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK1-NEXT: ret i32 [[TMP18]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: ret i32 [[TMP16]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -186,71 +189,73 @@ // CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: store i32 456, ptr [[J]], align 4 @@ -288,34 +293,34 @@ // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 56088, ptr [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 56088, ptr [[TMP13]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28(ptr [[THIS1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -323,26 +328,29 @@ // CHECK3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_SS]], ptr [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A3]], i32 0, i32 0 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 -// CHECK3-NEXT: ret i32 [[TMP18]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: ret i32 [[TMP16]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSIiLi123ELx456EE3fooEv_l28 // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -354,69 +362,71 @@ // CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 56087, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 56087 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 56087 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 56087, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 456 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 456 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: store i32 456, ptr [[J]], align 4 @@ -459,39 +469,39 @@ // CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK5-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 123, ptr [[I]], align 4 @@ -530,37 +540,37 @@ // CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 456 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP5]], 456 // CHECK7-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL4]] // CHECK7-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[THIS1]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 123, ptr [[I]], align 4 @@ -590,10 +600,10 @@ // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 @@ -618,91 +628,91 @@ // CHECK9-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP1]], [[TMP3]] // CHECK9-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 40, i1 false) -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store i64 [[TMP7]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 // CHECK9-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP9]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP25]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP26]], align 8 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 // CHECK9-NEXT: store ptr null, ptr [[TMP27]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP28]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store i64 [[TMP3]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP32]], align 8 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP33]], align 8 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[VLA]], ptr [[TMP35]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 -// CHECK9-NEXT: store ptr null, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP43:%.*]] = load i32, ptr [[M]], align 4 -// CHECK9-NEXT: store i32 [[TMP43]], ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[M]], align 4 +// CHECK9-NEXT: store i32 [[TMP32]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK9-NEXT: [[CONV5:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP45]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV5]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[TMP46:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_4]], align 8 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP46]], 1 +// CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP34]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP35]], 1 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP47]], align 4 -// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 5, ptr [[TMP48]], align 4 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 8 -// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 8 -// CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr [[TMP41]], ptr [[TMP51]], align 8 -// CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP52]], align 8 -// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP53]], align 8 -// CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP54]], align 8 -// CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP55]], align 8 -// CHECK9-NEXT: [[TMP56:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK9-NEXT: br i1 [[TMP57]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP36]], align 4 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 5, ptr [[TMP37]], align 4 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 8 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 8 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP30]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 [[ADD]], ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0 +// CHECK9-NEXT: br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i64 [[TMP7]], i64 [[TMP9]], i64 [[TMP1]], i64 [[TMP3]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP58:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP58]]) +// CHECK9-NEXT: [[TMP47:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef signext [[TMP47]]) // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK9-NEXT: [[TMP59:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP59]]) -// CHECK9-NEXT: [[TMP60:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP60]] +// CHECK9-NEXT: [[TMP48:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK9-NEXT: call void @llvm.stackrestore(ptr [[TMP48]]) +// CHECK9-NEXT: [[TMP49:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP49]] // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 @@ -713,6 +723,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[M]], ptr [[M_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -721,169 +732,177 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i64 [[TMP0]], i64 [[TMP1]], ptr [[TMP2]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK9-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK9-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK9-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK9-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK9-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK9-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 // CHECK9-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK9-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: land.lhs.true: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK9-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK9-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK9-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK9-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK9-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK9-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK9-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK9-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK9-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK9-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK9-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK9-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK9-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK9-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK9-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK9-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK9-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK9-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK9-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group !5 -// CHECK9-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP29]] to i64 -// CHECK9-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM36]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX37]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK9-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK9-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK9-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK9-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK9-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK9-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK9-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK9-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK9-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK9-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK9-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK9-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK9-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK9-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK9-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK9-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK9-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK9-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP33]] to i64 +// CHECK9-NEXT: [[TMP34:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP8]] +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 [[TMP34]] +// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP35]] to i64 +// CHECK9-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM34]] +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX35]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK9-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK9-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK9-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK9-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB39:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK9-NEXT: [[DIV40:%.*]] = sdiv i32 [[SUB39]], 1 -// CHECK9-NEXT: [[MUL41:%.*]] = mul nsw i32 [[DIV40]], 1 -// CHECK9-NEXT: [[ADD42:%.*]] = add nsw i32 0, [[MUL41]] -// CHECK9-NEXT: store i32 [[ADD42]], ptr [[I11]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK9-NEXT: [[SUB43:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK9-NEXT: [[DIV44:%.*]] = sdiv i32 [[SUB43]], 1 -// CHECK9-NEXT: [[MUL45:%.*]] = mul nsw i32 [[DIV44]], 1 -// CHECK9-NEXT: [[ADD46:%.*]] = add nsw i32 0, [[MUL45]] -// CHECK9-NEXT: store i32 [[ADD46]], ptr [[J12]], align 4 +// CHECK9-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB37:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK9-NEXT: [[DIV38:%.*]] = sdiv i32 [[SUB37]], 1 +// CHECK9-NEXT: [[MUL39:%.*]] = mul nsw i32 [[DIV38]], 1 +// CHECK9-NEXT: [[ADD40:%.*]] = add nsw i32 0, [[MUL39]] +// CHECK9-NEXT: store i32 [[ADD40]], ptr [[I9]], align 4 +// CHECK9-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[SUB41:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK9-NEXT: [[DIV42:%.*]] = sdiv i32 [[SUB41]], 1 +// CHECK9-NEXT: [[MUL43:%.*]] = mul nsw i32 [[DIV42]], 1 +// CHECK9-NEXT: [[ADD44:%.*]] = add nsw i32 0, [[MUL43]] +// CHECK9-NEXT: store i32 [[ADD44]], ptr [[J10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -904,34 +923,34 @@ // CHECK9-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[A]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -943,18 +962,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -966,70 +988,72 @@ // CHECK9-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM7]] -// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX8]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK9-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: store i32 2, ptr [[J]], align 4 @@ -1090,91 +1114,91 @@ // CHECK11-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 4 // CHECK11-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes, i32 40, i1 false) -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 // CHECK11-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[VLA]], ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 4 -// CHECK11-NEXT: store i64 [[TMP10]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr null, ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP41]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[M]], align 4 -// CHECK11-NEXT: store i32 [[TMP42]], ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP43]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP30]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[M]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP32]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP33]], 0 // CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] // CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 // CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[TMP45:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP45]], 1 +// CHECK11-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP34]], 1 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 5, ptr [[TMP47]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP38]], ptr [[TMP48]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP39]], ptr [[TMP49]], align 4 -// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr [[TMP40]], ptr [[TMP50]], align 4 -// CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP51]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP52]], align 4 -// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP53]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP54]], align 8 -// CHECK11-NEXT: [[TMP55:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 5, ptr [[TMP36]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP27]], ptr [[TMP37]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP28]], ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP29]], ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 [[ADD]], ptr [[TMP43]], align 8 +// CHECK11-NEXT: [[TMP44:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK11-NEXT: br i1 [[TMP45]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83(i32 [[TMP5]], i32 [[TMP7]], i32 [[TMP0]], i32 [[TMP1]], ptr [[VLA]]) #[[ATTR3:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP57:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP57]]) +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10ELi2EEiT_(i32 noundef [[TMP46]]) // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 -// CHECK11-NEXT: [[TMP58:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP58]]) -// CHECK11-NEXT: [[TMP59:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP59]] +// CHECK11-NEXT: [[TMP47:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK11-NEXT: call void @llvm.stackrestore(ptr [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP48]] // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l83 @@ -1185,6 +1209,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -1193,167 +1218,175 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[N_ADDR]], ptr [[M_ADDR]], i32 [[TMP0]], i32 [[TMP1]], ptr [[TMP2]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[M_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[M_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_5:%.*]] = alloca i64, align 8 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP13]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB6:%.*]] = sub nsw i32 [[TMP8]], 0 -// CHECK11-NEXT: [[DIV7:%.*]] = sdiv i32 [[SUB6]], 1 -// CHECK11-NEXT: [[CONV8:%.*]] = sext i32 [[DIV7]] to i64 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV8]] -// CHECK11-NEXT: [[SUB9:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK11-NEXT: store i64 [[SUB9]], ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP14]], 0 +// CHECK11-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK11-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 // CHECK11-NEXT: store i32 0, ptr [[J]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP15]] // CHECK11-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: land.lhs.true: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: store i64 [[TMP17]], ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK11-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 -// CHECK11-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP19]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i64 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_5]], align 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i64 [ [[TMP22]], [[COND_TRUE]] ], [ [[TMP23]], [[COND_FALSE]] ] // CHECK11-NEXT: store i64 [[COND]], ptr [[DOTOMP_UB]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 -// CHECK11-NEXT: store i64 [[TMP18]], ptr [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8 +// CHECK11-NEXT: store i64 [[TMP24]], ptr [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP12:%.*]] = icmp sle i64 [[TMP25]], [[TMP26]] +// CHECK11-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP22]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK11-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK11-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP21]], [[CONV18]] -// CHECK11-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK11-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP25]], 0 -// CHECK11-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK11-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK11-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP24]], [[CONV25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP26]], 0 -// CHECK11-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK11-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK11-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP23]], [[MUL31]] -// CHECK11-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK11-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK11-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK11-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP3]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group !6 -// CHECK11-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX36]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP28]], 0 +// CHECK11-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK11-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK11-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK11-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP27]], [[CONV16]] +// CHECK11-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK11-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK11-NEXT: store i32 [[CONV19]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP31]], 0 +// CHECK11-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK11-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK11-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP30]], [[CONV23]] +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP32]], 0 +// CHECK11-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK11-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK11-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK11-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK11-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP29]], [[MUL29]] +// CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK11-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK11-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK11-NEXT: store i32 [[CONV33]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP34:%.*]] = mul nsw i32 [[TMP33]], [[TMP8]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP34]] +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP35]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 -// CHECK11-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD37]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP36]], 1 +// CHECK11-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP32]]) -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = icmp ne i32 [[TMP33]], 0 -// CHECK11-NEXT: br i1 [[TMP34]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP37:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP37]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP38]]) +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 +// CHECK11-NEXT: br i1 [[TMP40]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB38:%.*]] = sub nsw i32 [[TMP35]], 0 -// CHECK11-NEXT: [[DIV39:%.*]] = sdiv i32 [[SUB38]], 1 -// CHECK11-NEXT: [[MUL40:%.*]] = mul nsw i32 [[DIV39]], 1 -// CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 0, [[MUL40]] -// CHECK11-NEXT: store i32 [[ADD41]], ptr [[I11]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4 -// CHECK11-NEXT: [[SUB42:%.*]] = sub nsw i32 [[TMP36]], 0 -// CHECK11-NEXT: [[DIV43:%.*]] = sdiv i32 [[SUB42]], 1 -// CHECK11-NEXT: [[MUL44:%.*]] = mul nsw i32 [[DIV43]], 1 -// CHECK11-NEXT: [[ADD45:%.*]] = add nsw i32 0, [[MUL44]] -// CHECK11-NEXT: store i32 [[ADD45]], ptr [[J12]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB36:%.*]] = sub nsw i32 [[TMP41]], 0 +// CHECK11-NEXT: [[DIV37:%.*]] = sdiv i32 [[SUB36]], 1 +// CHECK11-NEXT: [[MUL38:%.*]] = mul nsw i32 [[DIV37]], 1 +// CHECK11-NEXT: [[ADD39:%.*]] = add nsw i32 0, [[MUL38]] +// CHECK11-NEXT: store i32 [[ADD39]], ptr [[I9]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP42]], 0 +// CHECK11-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1 +// CHECK11-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1 +// CHECK11-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]] +// CHECK11-NEXT: store i32 [[ADD43]], ptr [[J10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: // CHECK11-NEXT: br label [[OMP_PRECOND_END]] @@ -1374,34 +1407,34 @@ // CHECK11-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[A]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 20, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.2, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.3, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 20, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB2]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiLi10ELi2EEiT__l69(ptr [[A]]) #[[ATTR3]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1413,18 +1446,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(80) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1436,68 +1472,70 @@ // CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 19, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 19 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 19 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 19, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 2 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP10]], 2 +// CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL4]] +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP12]] -// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 [[ADD6]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[TMP2]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP14]] +// CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK11-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK11-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: store i32 2, ptr [[J]], align 4 @@ -1583,13 +1621,13 @@ // CHECK13-NEXT: store i64 [[TMP13]], ptr [[DOTOMP_IV]], align 8 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP17]], 0 // CHECK13-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK13-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -1598,16 +1636,16 @@ // CHECK13-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK13-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP20]], 0 // CHECK13-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK13-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK13-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK13-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP19]], [[CONV22]] -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP21]], 0 // CHECK13-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK13-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -1617,22 +1655,22 @@ // CHECK13-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK13-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK13-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !2 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK13-NEXT: [[TMP23:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP3]] // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i64 [[TMP23]] -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[IDXPROM33:%.*]] = sext i32 [[TMP24]] to i64 // CHECK13-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i64 [[IDXPROM33]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group !2 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX34]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: [[ADD35:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !2 +// CHECK13-NEXT: store i64 [[ADD35]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP2]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1677,38 +1715,38 @@ // CHECK13-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK13-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK13-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK13-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM6]] -// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, ptr [[ARRAYIDX7]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 10, ptr [[I]], align 4 @@ -1784,13 +1822,13 @@ // CHECK15-NEXT: store i64 [[TMP11]], ptr [[DOTOMP_IV]], align 8 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[CMP11:%.*]] = icmp sle i64 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP15]], 0 // CHECK15-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK15-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]] @@ -1799,16 +1837,16 @@ // CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]] // CHECK15-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP18]], 0 // CHECK15-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 // CHECK15-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] // CHECK15-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64 // CHECK15-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP17]], [[CONV22]] -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP19]], 0 // CHECK15-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK15-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] @@ -1818,20 +1856,20 @@ // CHECK15-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1 // CHECK15-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]] // CHECK15-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32 -// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group !3 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, ptr [[I9]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[TMP21:%.*]] = mul nsw i32 [[TMP20]], [[TMP1]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[VLA]], i32 [[TMP21]] -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX]], i32 [[TMP22]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group !3 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX33]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: [[TMP23:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 [[TMP23]], 1 -// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group !3 +// CHECK15-NEXT: store i64 [[ADD34]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 @@ -1876,36 +1914,36 @@ // CHECK15-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP3]], 2 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP5]], 2 // CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 2 // CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], [[MUL3]] // CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group !7 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], ptr [[A]], i32 0, i32 [[TMP6]] -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP7]] -// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 0, ptr [[ARRAYIDX6]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 10, ptr [[I]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -272,18 +272,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -293,59 +296,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -357,18 +362,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -378,59 +386,61 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -442,18 +452,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -463,76 +476,78 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 123, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -688,18 +703,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -709,58 +727,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -772,18 +792,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -793,58 +816,60 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP13]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -856,18 +881,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -877,75 +905,77 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 122, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 61) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 122 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 122 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 122, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[TMP2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], ptr [[A]], i32 0, i32 [[TMP13]] // CHECK3-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 123, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -1457,23 +1487,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1486,82 +1521,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP9]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1579,23 +1616,28 @@ // CHECK9-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -1608,82 +1650,84 @@ // CHECK9-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] -// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP15]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK9-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK9-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -1702,7 +1746,7 @@ // CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8 // CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 @@ -1710,22 +1754,26 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP1]], i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP0]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -1738,101 +1786,105 @@ // CHECK9-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK9-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK9-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK9-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK9: omp.precond.then: // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP26]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] -// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] +// CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK9-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK9-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK9-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK9-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK9-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK9-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK9-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -1971,18 +2023,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1992,58 +2047,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP21]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2055,18 +2112,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2076,58 +2136,60 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] -// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK9-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK9-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2139,18 +2201,21 @@ // CHECK9-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -2160,75 +2225,77 @@ // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK9-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK9: omp.dispatch.cond: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 [[IDXPROM]] // CHECK9-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP27]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK9: omp.dispatch.end: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK9-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 10, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -2495,23 +2562,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined., ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2524,81 +2596,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP10]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2616,23 +2690,28 @@ // CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @.omp_outlined..1, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2645,81 +2724,83 @@ // CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP8]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP9]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16:![0-9]+]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP17]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP21]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP16]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP23]], 0 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP27]], 0 // CHECK11-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1 // CHECK11-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1 // CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]] @@ -2738,7 +2819,7 @@ // CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 @@ -2746,22 +2827,26 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..4, ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[N_ADDR]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]], i32 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 @@ -2774,100 +2859,104 @@ // CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 // CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 // CHECK11-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: store i32 0, ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] // CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK11: omp.precond.then: // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP15]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP18]], [[COND_TRUE]] ], [ [[TMP19]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP21]], [[TMP22]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19:![0-9]+]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP23]], [[TMP24]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 [[TMP20]] +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[TMP26]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], 1 // CHECK11-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP19]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP33]]) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK11-NEXT: br i1 [[TMP35]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP36]], 0 // CHECK11-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1 // CHECK11-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1 // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]] @@ -3006,18 +3095,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3027,57 +3119,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3089,18 +3183,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..10, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3110,57 +3207,59 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP11]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK11-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 -// CHECK11-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] @@ -3172,18 +3271,21 @@ // CHECK11-SAME: (ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..13, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..13 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[A:%.*]]) #[[ATTR2]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -3193,74 +3295,76 @@ // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 10) // CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 9 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]] +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 [[TMP13]] // CHECK11-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK11-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK11: omp.dispatch.end: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 10, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -342,8 +342,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -352,145 +351,155 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done3: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done1: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 [[IDXPROM7]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[VAR4]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done12: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done9: // CHECK1-NEXT: ret void // // @@ -699,7 +708,7 @@ // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -708,23 +717,27 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -732,119 +745,123 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK1-NEXT: [[_TMP7:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK1-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK1-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done2: +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 [[IDXPROM9]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP22]] to i64 +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM6]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK1-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i64 2 +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK1-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done13: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done10: // CHECK1-NEXT: ret void // // @@ -1143,8 +1160,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1153,143 +1169,153 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 noundef [[SIVAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done3: -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP2]], ptr noundef [[AGG_TMP5]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP5]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done1: +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP8]], ptr noundef [[AGG_TMP2]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX7]], ptr align 4 [[VAR4]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK3-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP25]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN10]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP30]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN10]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE11:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done11: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done8: // CHECK3-NEXT: ret void // // @@ -1498,7 +1524,7 @@ // CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1507,23 +1533,27 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..3, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP5]]) +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 noundef [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1531,117 +1561,121 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 4 -// CHECK3-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK3-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK3-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 // CHECK3-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK3-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK3-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 -// CHECK3-NEXT: [[_TMP7:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK3-NEXT: [[AGG_TMP3:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC2]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK3-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK3: omp.arraycpy.body: -// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK3-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr noundef [[AGG_TMP]]) // CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR2]] // CHECK3-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK3-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK3: omp.arraycpy.done4: -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP4]], ptr noundef [[AGG_TMP6]]) -// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR2]] -// CHECK3-NEXT: store ptr [[VAR5]], ptr [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP9]] +// CHECK3-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] +// CHECK3: omp.arraycpy.done2: +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: call void @_ZN2StC1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP10]], ptr noundef [[AGG_TMP3]]) +// CHECK3-NEXT: call void @_ZN2StD1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[AGG_TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP4]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP12]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP7]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP20]] +// CHECK3-NEXT: store i32 [[TMP19]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP22]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP25]]) +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK3-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i32 2 +// CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] +// CHECK3-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP22]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP28]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] -// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN11]] -// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE12:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK3: arraydestroy.done12: +// CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK3: arraydestroy.done9: // CHECK3-NEXT: ret void // // @@ -2514,35 +2548,33 @@ // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load volatile i32, ptr [[TMP2]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[G1_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr @g, align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 3, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP4]], i64 [[TMP6]]) +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[G:%.*]], i64 noundef [[G1:%.*]], i64 noundef [[SIVAR:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[G:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[G1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -2554,68 +2586,76 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP9]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 1, ptr [[G_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G_ADDR]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP16]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP8]]) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp @@ -163,25 +163,31 @@ // CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SVAR]], ptr [[SVAR_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 // CHECK1-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G_ADDR]], ptr [[TMP0]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -189,104 +195,106 @@ // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK1-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G2]], ptr [[TMP14]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP19]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK1-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK1-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 -// CHECK1-NEXT: store volatile double [[TMP26]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK1-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK1-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK1-NEXT: store volatile double [[TMP29]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 8 +// CHECK1-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP32]], ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK1-NEXT: store float [[TMP33]], ptr [[TMP8]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -328,6 +336,7 @@ // CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SVAR]], ptr [[SVAR_ADDR]], align 4 @@ -341,20 +350,25 @@ // CHECK3-NEXT: [[TMP4:%.*]] = load double, ptr [[TMP3]], align 4 // CHECK3-NEXT: store double [[TMP4]], ptr [[G13]], align 8 // CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @.omp_outlined., ptr [[G2]], ptr [[TMP5]], ptr [[SVAR_ADDR]], ptr [[SFVAR_ADDR]]) +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR_ADDR]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[G1:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SFVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SFVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -362,104 +376,106 @@ // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G2:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[G13:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SVAR5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[SFVAR6:%.*]] = alloca float, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[G1:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SFVAR:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G1]], ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[G1_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SFVAR_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[G13]], ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store double 1.000000e+00, ptr [[G2]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store i32 3, ptr [[SVAR5]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR6]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G2]], ptr [[TMP14]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP4]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[SVAR5]], ptr [[TMP17]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[SFVAR6]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store double 1.000000e+00, ptr [[G]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store volatile double 1.000000e+00, ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store i32 3, ptr [[SVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store float 4.000000e+00, ptr [[SFVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP19]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[SVAR]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[SFVAR]], ptr [[TMP23]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK3-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP11]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK3-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[G2]], align 8 -// CHECK3-NEXT: store volatile double [[TMP24]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 4 -// CHECK3-NEXT: store volatile double [[TMP26]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load float, ptr [[SFVAR6]], align 4 -// CHECK3-NEXT: store float [[TMP28]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double, ptr [[G]], align 8 +// CHECK3-NEXT: store volatile double [[TMP29]], ptr [[TMP2]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load double, ptr [[TMP30]], align 4 +// CHECK3-NEXT: store volatile double [[TMP31]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP32]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load float, ptr [[SFVAR]], align 4 +// CHECK3-NEXT: store float [[TMP33]], ptr [[TMP8]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK3: .omp.lastprivate.done: // CHECK3-NEXT: ret void @@ -655,6 +671,7 @@ // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -664,21 +681,27 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -686,31 +709,33 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -720,106 +745,106 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM9]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX10]], ptr align 4 [[TMP16]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP22]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN12]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN12]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE13:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN12]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE13]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done13: -// CHECK9-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i64 4, i1 false) -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK9-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i64 4, i1 false) +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP34]], ptr [[TMP10]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN14:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN14]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN14]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done15: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -997,6 +1022,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -1005,20 +1031,25 @@ // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1026,28 +1057,30 @@ // CHECK9-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK9-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: @@ -1057,104 +1090,104 @@ // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK9-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK9-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP15]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK9-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP20]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK9-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK9-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK9-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK9-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK9-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN11]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN7]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE8:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN7]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i64 4, i1 false) // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done12: -// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP4]], ptr align 4 [[TMP26]], i64 4, i1 false) +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE8]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done8: +// CHECK9-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i64 4, i1 false) // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN13]], i64 2 +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN9:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN9]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done14: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN9]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE10:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done10: // CHECK9-NEXT: ret void // // @@ -1356,6 +1389,7 @@ // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1365,21 +1399,27 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]], ptr [[SVAR_ADDR]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: store ptr [[SVAR_ADDR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1387,31 +1427,33 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SVAR7:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[SVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[SVAR]], ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[SVAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1421,104 +1463,104 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 1 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 [[TMP17]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX9]], ptr align 4 [[TMP16]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: store i32 [[TMP20]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP23]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP22]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP20]]) -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 -// CHECK11-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP26]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0 -// CHECK11-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN11:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN11]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN11]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE12:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN11]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP26]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE12]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done12: -// CHECK11-NEXT: [[TMP27:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP5]], ptr align 4 [[TMP27]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[SVAR7]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP32]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP33:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP11]], ptr align 4 [[TMP33]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, ptr [[SVAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP34]], ptr [[TMP10]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN13:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN13]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP29]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP35]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN13]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE14:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done14: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // @@ -1696,6 +1738,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1704,20 +1747,25 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @.omp_outlined..1, ptr [[TMP0]], ptr [[T_VAR_ADDR]], ptr [[TMP1]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 @@ -1725,28 +1773,30 @@ // CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC3:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR4:%.*]] = alloca [2 x %struct.S.0], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 -// CHECK11-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[_TMP2:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store ptr [[TMP8]], ptr [[TMP]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: @@ -1756,102 +1806,102 @@ // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) -// CHECK11-NEXT: store ptr [[VAR5]], ptr [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) +// CHECK11-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP11]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP12]], 1 // CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK11: cond.true: // CHECK11-NEXT: br label [[COND_END:%.*]] // CHECK11: cond.false: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK11-NEXT: br label [[COND_END]] // CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] // CHECK11-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[T_VAR2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC3]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[_TMP6]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX8]], ptr align 4 [[TMP15]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP20]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP22]], 1 +// CHECK11-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP19]]) -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0 -// CHECK11-NEXT: br i1 [[TMP21]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP24]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK11: .omp.final.then: // CHECK11-NEXT: store i32 2, ptr [[I]], align 4 // CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0 -// CHECK11-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK11-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR2]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP0]], ptr align 4 [[VEC3]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN10:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN10]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN10]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE11:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP2]], ptr align 4 [[VEC]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN6]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE7:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR4]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN10]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[S_ARR]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN6]], [[DOTOMP_LASTPRIVATE_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr align 4 [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 4, i1 false) // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP25]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE11]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done11: -// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[_TMP6]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP4]], ptr align 4 [[TMP26]], i32 4, i1 false) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP30]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE7]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done7: +// CHECK11-NEXT: [[TMP31:%.*]] = load ptr, ptr [[_TMP2]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP9]], ptr align 4 [[TMP31]], i32 4, i1 false) // CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN12:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR4]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN12]], i32 2 +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP27]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP32]], [[DOTOMP_LASTPRIVATE_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN12]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE13:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done13: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done9: // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp @@ -275,15 +275,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK1-SAME: () #[[ATTR4:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -298,6 +300,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -313,75 +317,75 @@ // CHECK1-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 [[IDXPROM2]] // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false), !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD4]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK1-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN6]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -478,15 +482,17 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK1-SAME: () #[[ATTR4]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -502,6 +508,8 @@ // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -519,72 +527,72 @@ // CHECK1: arrayctor.cont: // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK1-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 [[IDXPROM4]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP10]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX5]], ptr align 4 [[TMP11]], i64 4, i1 false), !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] @@ -808,15 +816,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l94 // CHECK3-SAME: () #[[ATTR4:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -831,6 +841,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -846,73 +858,73 @@ // CHECK3-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP10]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 [[TMP11]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX2]], ptr align 4 [[VAR]], i32 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP16]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 +// CHECK3-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN5:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN5]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP19]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN5]] @@ -1009,15 +1021,17 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l56 // CHECK3-SAME: () #[[ATTR4]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined..3) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 @@ -1033,6 +1047,8 @@ // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 @@ -1050,70 +1066,70 @@ // CHECK3: arrayctor.cont: // CHECK3-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) // CHECK3-NEXT: store ptr [[VAR]], ptr [[_TMP2]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 [[TMP8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP10]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 [[TMP10]] +// CHECK3-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 [[TMP12]] +// CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX4]], ptr align 4 [[TMP11]], i32 4, i1 false), !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP14]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP15]]) +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAY_BEGIN6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN6]], i32 2 // CHECK3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK3: arraydestroy.body: -// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP17]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP18]], [[DOTOMP_FINAL_DONE]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK3-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR2]] // CHECK3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN6]] @@ -2065,17 +2081,19 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[G1_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // CHECK9-NEXT: store i64 [[G1]], ptr [[G1_ADDR]], align 8 // CHECK9-NEXT: store ptr [[G1_ADDR]], ptr [[TMP]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 @@ -2091,66 +2109,68 @@ // CHECK9-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 // CHECK9-NEXT: store ptr [[G1]], ptr [[_TMP2]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 1 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 1, ptr [[G]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store volatile i32 1, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store volatile i32 1, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[G]], ptr [[TMP9]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[G]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[_TMP2]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp @@ -97,34 +97,34 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK1-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63(i64 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -137,18 +137,21 @@ // CHECK1-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -159,81 +162,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP5]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -247,15 +252,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -271,41 +276,41 @@ // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK1-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i64 [[TMP1]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret i32 0 @@ -315,18 +320,21 @@ // CHECK1-SAME: (i64 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -337,81 +345,83 @@ // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP11]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: // CHECK1-NEXT: store i32 2, ptr [[I]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK1-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK1-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK1-NEXT: ] // CHECK1: .omp.reduction.case1: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.case2: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK1-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK1: .omp.reduction.default: // CHECK1-NEXT: ret void @@ -425,15 +435,15 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK1-NEXT: ret void // // @@ -459,34 +469,34 @@ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 -// CHECK3-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3:[0-9]+]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l63(i32 [[TMP1]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -499,18 +509,21 @@ // CHECK3-SAME: (i32 noundef [[SIVAR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -521,81 +534,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[SIVAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -609,15 +624,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -633,41 +648,41 @@ // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP9]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 2, ptr [[TMP18]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 2, ptr [[TMP15]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB3]], i64 -1, i32 0, i32 1, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 +// CHECK3-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l32(i32 [[TMP1]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret i32 0 @@ -677,18 +692,21 @@ // CHECK3-SAME: (i32 noundef [[T_VAR:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[T_VAR_ADDR]]) +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[T_VAR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -699,81 +717,83 @@ // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[T_VAR1]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: store i32 0, ptr [[T_VAR]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12:![0-9]+]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[T_VAR1]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP12]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK3: .omp.final.then: // CHECK3-NEXT: store i32 2, ptr [[I]], align 4 // CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[T_VAR1]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP2]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[T_VAR]], ptr [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2]], i32 [[TMP4]], i32 1, i32 4, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func.2, ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: switch i32 [[TMP17]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK3-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK3-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK3-NEXT: ] // CHECK3: .omp.reduction.case1: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.case2: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP20]] monotonic, align 4 -// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP20]] monotonic, align 4 +// CHECK3-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK3: .omp.reduction.default: // CHECK3-NEXT: ret void @@ -787,15 +807,15 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 // CHECK3-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK3-NEXT: ret void // // @@ -824,26 +844,26 @@ // CHECK5-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 @@ -870,37 +890,37 @@ // CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK5-NEXT: ret i32 0 // @@ -923,26 +943,26 @@ // CHECK7-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !3 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP3]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] -// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD1]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 @@ -969,37 +989,37 @@ // CHECK7-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK7-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: store i32 0, ptr [[T_VAR1]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]] -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP5]], [[TMP4]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[T_VAR1]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] +// CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP6]], 1 +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP7]] // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 2, ptr [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR1]], align 4 -// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR1]], align 4 +// CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP7]], [[TMP8]] // CHECK7-NEXT: store i32 [[ADD4]], ptr [[T_VAR]], align 4 // CHECK7-NEXT: ret i32 0 // @@ -1018,18 +1038,21 @@ // CHECK9-SAME: (i64 noundef [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[SIVAR_ADDR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP0]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR1:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -1041,84 +1064,86 @@ // CHECK9-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: store i32 0, ptr [[SIVAR1]], align 4 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: store i32 0, ptr [[SIVAR]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP4]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP5]], 1 // CHECK9-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK9: cond.true: // CHECK9-NEXT: br label [[COND_END:%.*]] // CHECK9: cond.false: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK9-NEXT: br label [[COND_END]] // CHECK9: cond.end: -// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK9-NEXT: [[COND:%.*]] = phi i32 [ 1, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ] // CHECK9-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] -// CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4:![0-9]+]] +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK9-NEXT: store i32 [[ADD3]], ptr [[SIVAR1]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP11]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group !4 +// CHECK9-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK9-NEXT: store i32 [[ADD2]], ptr [[SIVAR]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(8) [[REF_TMP]]), !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] +// CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK9-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP4]] // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: -// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK9-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK9-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK9: .omp.final.then: // CHECK9-NEXT: store i32 2, ptr [[I]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK9-NEXT: store ptr [[SIVAR1]], ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK9-NEXT: store ptr [[SIVAR]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_reduce(ptr @[[GLOB2:[0-9]+]], i32 [[TMP4]], i32 1, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @.omp.reduction.reduction_func, ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: switch i32 [[TMP18]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ // CHECK9-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] // CHECK9-NEXT: i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]] // CHECK9-NEXT: ] // CHECK9: .omp.reduction.case1: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK9-NEXT: store i32 [[ADD5]], ptr [[TMP0]], align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] +// CHECK9-NEXT: store i32 [[ADD4]], ptr [[TMP2]], align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.case2: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR1]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP21]] monotonic, align 4 -// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP2]], ptr @.gomp_critical_user_.reduction.var) +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP2]], i32 [[TMP21]] monotonic, align 4 +// CHECK9-NEXT: call void @__kmpc_end_reduce(ptr @[[GLOB2]], i32 [[TMP4]], ptr @.gomp_critical_user_.reduction.var) // CHECK9-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]] // CHECK9: .omp.reduction.default: // CHECK9-NEXT: ret void @@ -1132,15 +1157,15 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK9-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 // CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP2]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP10]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP9]] +// CHECK9-NEXT: store i32 [[ADD]], ptr [[TMP7]], align 4 // CHECK9-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -173,41 +173,47 @@ // CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[G1]], i64 [[TMP3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK1-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK1-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[G]], align 128 +// CHECK1-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP7]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -235,41 +241,47 @@ // CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 -// CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[G1]], i32 [[TMP3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK3-NEXT: store i32 [[TMP5]], ptr [[G]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G]], align 128 +// CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP7]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -458,8 +470,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -468,77 +479,87 @@ // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP6]]) +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done3: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done8: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -589,24 +610,29 @@ // CHECK9-SAME: (i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i64 [[TMP1]]) +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK9-NEXT: ret void // // @@ -614,11 +640,11 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8 @@ -628,9 +654,9 @@ // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 signext 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 signext 3) // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -721,12 +747,12 @@ // CHECK9: omp_offload.cont6: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[OMP_OFFLOAD_CONT6]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -848,6 +874,7 @@ // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 8 // CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 @@ -858,72 +885,79 @@ // CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 128 // CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP1]], ptr [[T_VAR1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[T_VAR1]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 128 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[T_VAR]], align 128 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done1: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done5: // CHECK9-NEXT: ret void // // @@ -955,27 +989,32 @@ // CHECK9-NEXT: entry: // CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 // CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[T_VAR1]]) +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[T_VAR1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 128 // CHECK9-NEXT: ret void // // @@ -985,7 +1024,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -999,7 +1038,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1015,9 +1054,9 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1219,8 +1258,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1229,77 +1267,87 @@ // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[SIVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @.omp_outlined., ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP6]]) +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 4 -// CHECK11-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S], align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S:%.*]], align 4 -// CHECK11-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S:%.*]], align 4 +// CHECK11-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC1]], ptr align 4 [[TMP0]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[SIVAR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VEC]], ptr align 4 [[TMP2]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done3: -// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX6]], ptr align 4 [[VAR4]], i32 4, i1 false) -// CHECK11-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN7]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP11]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done1: +// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN1SIfEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX3]], ptr align 4 [[VAR]], i32 4, i1 false) +// CHECK11-NEXT: store i32 2, ptr [[SIVAR]], align 4 +// CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP13]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done8: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // @@ -1350,24 +1398,29 @@ // CHECK11-SAME: (i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], ptr [[T_VAR]], align 4 // CHECK11-NEXT: ret void // // @@ -1375,11 +1428,11 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4 @@ -1389,9 +1442,9 @@ // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 @@ -1482,12 +1535,12 @@ // CHECK11: omp_offload.cont6: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP41]], [[OMP_OFFLOAD_CONT6]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1609,6 +1662,7 @@ // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 @@ -1619,72 +1673,79 @@ // CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP1]], ptr [[T_VAR1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[T_VAR1]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK11-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 +// CHECK11-NEXT: [[AGG_TMP2:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[TMP0]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP4]], align 128 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[T_VAR]], align 128 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 [[TMP2]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP10]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done4: -// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i32 4, i1 false) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_1]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done1: +// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR]], ptr nonnull align 4 dereferenceable(4) [[TMP8]], ptr [[AGG_TMP2]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP2]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX3]], ptr align 128 [[VAR]], i32 4, i1 false) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN4:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN4]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_ARRAYCPY_DONE1]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done9: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN4]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done5: // CHECK11-NEXT: ret void // // @@ -1716,27 +1777,32 @@ // CHECK11-NEXT: entry: // CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[T_VAR1]]) +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[T_VAR1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 128 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 128 // CHECK11-NEXT: ret void // // @@ -1746,7 +1812,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void @@ -1760,7 +1826,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, ptr @g, align 128 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] @@ -1776,9 +1842,9 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK11-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[TMP0]], i32 0, i32 0 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[F2]], align 4 // CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[T]], i32 0, i32 0 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4 @@ -1938,6 +2004,7 @@ // CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 @@ -1950,61 +2017,72 @@ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined., ptr [[TMP4]], ptr [[N_ADDR]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]]) +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[S:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[A:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = call ptr @llvm.stacksave() -// CHECK17-NEXT: store ptr [[TMP5]], ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP6]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i64 [[TMP8]], i1 false) -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP9]], i64 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPg(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP10]], i32 signext [[TMP11]], ptr [[TMP12]]) -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP13]]) +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() +// CHECK17-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP15]], align 128 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA]], ptr align 128 [[TMP13]], i64 [[TMP17]], i1 false) +// CHECK17-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP18]], i64 0 +// CHECK17-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: call void @_ZN2St7St_funcEPS_iPg(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP19]], i32 signext [[TMP20]], ptr [[TMP21]]) +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP22]]) // CHECK17-NEXT: ret void // // @@ -2168,6 +2246,7 @@ // CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK17-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 // CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 @@ -2181,74 +2260,85 @@ // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 // CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined..1, i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP4]], i64 [[TMP1]], i64 [[TMP2]], ptr [[TMP3]], ptr [[N_ADDR]], ptr [[TMP6]]) +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK17-NEXT: store i64 [[TMP0]], ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK17-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK17-NEXT: store i64 [[TMP1]], ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK17-NEXT: store i64 [[TMP2]], ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK17-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK17-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[S_ADDR]], align 8 +// CHECK17-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK17-NEXT: ret void // // // CHECK17-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[THIS:%.*]], i64 [[VLA2:%.*]], i64 [[VLA4:%.*]], ptr nonnull align 8 dereferenceable(8) [[VLA26:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr [[S:%.*]]) #[[ATTR2]] { +// CHECK17-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[VLA_ADDR3:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA_ADDR5:%.*]] = alloca i64, align 8 -// CHECK17-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8 -// CHECK17-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 8 +// CHECK17-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: store i64 [[VLA2]], ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: store i64 [[VLA4]], ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR3]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR5]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[N_ADDR]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave() -// CHECK17-NEXT: store ptr [[TMP6]], ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[VLA7:%.*]] = alloca double, i64 [[TMP7]], align 128 -// CHECK17-NEXT: store i64 [[TMP2]], ptr [[__VLA_EXPR0]], align 8 -// CHECK17-NEXT: store i64 [[TMP3]], ptr [[__VLA_EXPR1]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]] -// CHECK17-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 8 -// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i64 [[TMP9]], i1 false) -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 0 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[A]], align 4 -// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double -// CHECK17-NEXT: [[TMP11:%.*]] = mul nsw i64 1, [[TMP3]] -// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA7]], i64 [[TMP11]] -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 1 +// CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK17-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() +// CHECK17-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[VLA:%.*]] = alloca double, i64 [[TMP16]], align 128 +// CHECK17-NEXT: store i64 [[TMP7]], ptr [[__VLA_EXPR0]], align 8 +// CHECK17-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR1]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP7]], [[TMP9]] +// CHECK17-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK17-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VLA]], ptr align 128 [[TMP11]], i64 [[TMP18]], i1 false) +// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 0 +// CHECK17-NEXT: store i32 [[TMP19]], ptr [[A]], align 4 +// CHECK17-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK17-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP9]] +// CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i64 [[TMP20]] +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 1 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB]] to i64 -// CHECK17-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] -// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX8]], align 8 -// CHECK17-NEXT: [[CONV9:%.*]] = fpext double [[CONV]] to ppc_fp128 -// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 8 -// CHECK17-NEXT: [[B10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[B10]], align 4 -// CHECK17-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds ppc_fp128, ptr [[TMP13]], i64 [[IDXPROM11]] -// CHECK17-NEXT: store ppc_fp128 [[CONV9]], ptr [[ARRAYIDX12]], align 16 -// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP15]]) +// CHECK17-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i64 [[IDXPROM]] +// CHECK17-NEXT: store double [[CONV]], ptr [[ARRAYIDX1]], align 8 +// CHECK17-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to ppc_fp128 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK17-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK17-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds ppc_fp128, ptr [[TMP22]], i64 [[IDXPROM4]] +// CHECK17-NEXT: store ppc_fp128 [[CONV2]], ptr [[ARRAYIDX5]], align 16 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// CHECK17-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) // CHECK17-NEXT: ret void // // @@ -2392,6 +2482,7 @@ // CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 // CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 @@ -2404,61 +2495,72 @@ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR3]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined., ptr [[TMP4]], ptr [[N_ADDR]], i32 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]]) +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[S_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP14]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr [[S:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], i32 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[A:%.*]], i32 [[VLA2:%.*]], i32 [[VLA4:%.*]], ptr nonnull align 4 dereferenceable(8) [[VLA26:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA2]], ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: store i32 [[VLA4]], ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = call ptr @llvm.stacksave() -// CHECK19-NEXT: store ptr [[TMP5]], ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[VLA7:%.*]] = alloca double, i32 [[TMP6]], align 128 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[__VLA_EXPR1]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP7]], 8 -// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i32 [[TMP8]], i1 false) -// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP9]], i32 0 -// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP10]], i32 [[TMP11]], ptr [[TMP12]]) -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP13]]) +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave() +// CHECK19-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] +// CHECK19-NEXT: [[VLA:%.*]] = alloca double, i32 [[TMP15]], align 128 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[__VLA_EXPR0]], align 4 +// CHECK19-NEXT: store i32 [[TMP11]], ptr [[__VLA_EXPR1]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] +// CHECK19-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP16]], 8 +// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA]], ptr align 128 [[TMP13]], i32 [[TMP17]], i1 false) +// CHECK19-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP18]], i32 0 +// CHECK19-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: call void @_ZN2St7St_funcEPS_iPe(ptr nonnull align 4 dereferenceable(8) [[ARRAYIDX]], ptr [[TMP19]], i32 [[TMP20]], ptr [[TMP21]]) +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP22]]) // CHECK19-NEXT: ret void // // @@ -2620,6 +2722,7 @@ // CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK19-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 // CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 4 @@ -2633,72 +2736,83 @@ // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 8, ptr @.omp_outlined..1, i32 [[TMP0]], ptr [[TMP5]], ptr [[TMP4]], i32 [[TMP1]], i32 [[TMP2]], ptr [[TMP3]], ptr [[N_ADDR]], ptr [[TMP6]]) +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK19-NEXT: store i32 [[TMP0]], ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP7]], ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK19-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK19-NEXT: store i32 [[TMP1]], ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 4 +// CHECK19-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 5 +// CHECK19-NEXT: store ptr [[TMP3]], ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 6 +// CHECK19-NEXT: store ptr [[N_ADDR]], ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[S_ADDR]], align 4 +// CHECK19-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK19-NEXT: ret void // // // CHECK19-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[VLA:%.*]], ptr [[VLA1:%.*]], ptr [[THIS:%.*]], i32 [[VLA2:%.*]], i32 [[VLA4:%.*]], ptr nonnull align 4 dereferenceable(8) [[VLA26:%.*]], ptr nonnull align 4 dereferenceable(4) [[N:%.*]], ptr [[S:%.*]]) #[[ATTR2]] { +// CHECK19-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR2]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA1_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[VLA_ADDR3:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA_ADDR5:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: [[VLA2_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4 -// CHECK19-NEXT: [[S_ADDR:%.*]] = alloca ptr, align 4 +// CHECK19-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[VLA1]], ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[VLA2]], ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: store i32 [[VLA4]], ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: store ptr [[VLA26]], ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: store ptr [[S]], ptr [[S_ADDR]], align 4 -// CHECK19-NEXT: [[TMP0:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR3]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR5]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VLA2_ADDR]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[N_ADDR]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = call ptr @llvm.stacksave() -// CHECK19-NEXT: store ptr [[TMP6]], ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[VLA7:%.*]] = alloca double, i32 [[TMP7]], align 128 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[__VLA_EXPR0]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[__VLA_EXPR1]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = mul nuw i32 [[TMP2]], [[TMP3]] -// CHECK19-NEXT: [[TMP9:%.*]] = mul nuw i32 [[TMP8]], 8 -// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA7]], ptr align 128 [[TMP4]], i32 [[TMP9]], i1 false) -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP1]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 0 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[A]], align 4 -// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double -// CHECK19-NEXT: [[TMP11:%.*]] = mul nsw i32 1, [[TMP3]] -// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA7]], i32 [[TMP11]] -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i32 [[SUB]] -// CHECK19-NEXT: store double [[CONV]], ptr [[ARRAYIDX8]], align 8 -// CHECK19-NEXT: [[CONV9:%.*]] = fpext double [[CONV]] to x86_fp80 -// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[VLA1_ADDR]], align 4 -// CHECK19-NEXT: [[B10:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP1]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[B10]], align 4 -// CHECK19-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP13]], i32 [[TMP14]] -// CHECK19-NEXT: store x86_fp80 [[CONV9]], ptr [[ARRAYIDX11]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP15]]) +// CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 3 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 5 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 6 +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 7 +// CHECK19-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave() +// CHECK19-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP7]], [[TMP9]] +// CHECK19-NEXT: [[VLA:%.*]] = alloca double, i32 [[TMP16]], align 128 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[__VLA_EXPR0]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[__VLA_EXPR1]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP7]], [[TMP9]] +// CHECK19-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK19-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VLA]], ptr align 128 [[TMP11]], i32 [[TMP18]], i1 false) +// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP5]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 0 +// CHECK19-NEXT: store i32 [[TMP19]], ptr [[A]], align 4 +// CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double +// CHECK19-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP9]] +// CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VLA]], i32 [[TMP20]] +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP21]], 1 +// CHECK19-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX]], i32 [[SUB]] +// CHECK19-NEXT: store double [[CONV]], ptr [[ARRAYIDX1]], align 8 +// CHECK19-NEXT: [[CONV2:%.*]] = fpext double [[CONV]] to x86_fp80 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[B3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP5]], i32 0, i32 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[B3]], align 4 +// CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds x86_fp80, ptr [[TMP22]], i32 [[TMP23]] +// CHECK19-NEXT: store x86_fp80 [[CONV2]], ptr [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// CHECK19-NEXT: call void @llvm.stackrestore(ptr [[TMP24]]) // CHECK19-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_private_codegen.cpp b/clang/test/OpenMP/teams_private_codegen.cpp --- a/clang/test/OpenMP/teams_private_codegen.cpp +++ b/clang/test/OpenMP/teams_private_codegen.cpp @@ -211,34 +211,34 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK1-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK1-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK1-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) -// CHECK1-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK1-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48(ptr [[THIS1]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -250,18 +250,21 @@ // CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -270,20 +273,22 @@ // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK1-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store ptr [[B]], ptr [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8 // CHECK1-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 8 dereferenceable(32) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -317,26 +322,30 @@ // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l117 // CHECK1-SAME: () #[[ATTR3]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 +// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 1, ptr [[G]], align 128 // CHECK1-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G]], ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G]], ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[SIVAR]], ptr [[TMP2]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -396,34 +405,34 @@ // CHECK3-NEXT: store ptr [[TMP0]], ptr [[C]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK3-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK3-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK3-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) -// CHECK3-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK3-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48(ptr [[THIS1]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -435,18 +444,21 @@ // CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 4 // CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[TMP0]]) +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -455,20 +467,22 @@ // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK3-NEXT: store ptr [[A]], ptr [[TMP]], align 4 // CHECK3-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK3-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store ptr [[B]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK3-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 2 +// CHECK3-NEXT: store ptr [[B]], ptr [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK3-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4 // CHECK3-NEXT: call void @_ZZN2SSC1ERiENKUlvE_clEv(ptr noundef nonnull align 4 dereferenceable(16) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -502,26 +516,30 @@ // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l117 // CHECK3-SAME: () #[[ATTR3]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..1) +// CHECK3-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 1 +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[G:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 +// CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_2:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK3-NEXT: store i32 1, ptr [[G]], align 128 // CHECK3-NEXT: store i32 2, ptr [[SIVAR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G]], ptr [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR]], ptr [[TMP2]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr noundef nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -554,27 +572,27 @@ // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK9-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136() #[[ATTR4:[0-9]+]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -583,18 +601,18 @@ // CHECK9-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done1: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -636,15 +654,17 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136 // CHECK9-SAME: () #[[ATTR3:[0-9]+]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -652,6 +672,8 @@ // CHECK9-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -663,18 +685,18 @@ // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i64 4, i1 false) // CHECK9-NEXT: store i32 3, ptr [[SIVAR]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -697,62 +719,62 @@ // CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK9-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK9-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) -// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef signext 1) -// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 +// CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef signext 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef signext 3) // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK9-NEXT: store ptr null, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK9-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK9-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86() #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK9: arraydestroy.done1: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP13]] +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP12]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -778,34 +800,34 @@ // CHECK9-NEXT: store ptr [[TMP0]], ptr [[C]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK9-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48(ptr [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -817,18 +839,21 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -836,21 +861,23 @@ // CHECK9-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 // CHECK9-NEXT: store ptr [[C]], ptr [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK9-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK9-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK9-NEXT: ret void // // @@ -924,44 +951,48 @@ // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86 // CHECK9-SAME: () #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK9: arrayctor.loop: // CHECK9-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i64 1 +// CHECK9-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i64 1 // CHECK9-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK9-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK9: arrayctor.cont: // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 128 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX1]], ptr align 128 [[VAR]], i64 4, i1 false) // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i64 2 +// CHECK9-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN2]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] @@ -985,7 +1016,7 @@ // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: store i32 0, ptr [[F]], align 4 // CHECK9-NEXT: ret void // @@ -1003,34 +1034,34 @@ // CHECK9-NEXT: store i32 0, ptr [[A]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN3SSTIiEC1Ev_l64.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK9-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN3SSTIiEC1Ev_l64.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK9-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: // CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN3SSTIiEC1Ev_l64(ptr [[THIS1]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1042,29 +1073,34 @@ // CHECK9-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[TMP:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[A]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK9-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 // CHECK9-NEXT: ret void // // @@ -1076,7 +1112,7 @@ // CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK9-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK9-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK9-NEXT: ret void @@ -1119,27 +1155,27 @@ // CHECK11-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], float noundef 2.000000e+00) // CHECK11-NEXT: call void @_ZN1SIfEC1Ef(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], float noundef 3.000000e+00) // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136() #[[ATTR4:[0-9]+]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1148,18 +1184,18 @@ // CHECK11-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done1: // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN2SSC1ERi @@ -1201,15 +1237,17 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l136 // CHECK11-SAME: () #[[ATTR3:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined.) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 4 @@ -1217,6 +1255,8 @@ // CHECK11-NEXT: [[SIVAR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] @@ -1228,18 +1268,18 @@ // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIfEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 4 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[ARRAYIDX1]], ptr align 4 [[VAR]], i32 4, i1 false) // CHECK11-NEXT: store i32 3, ptr [[SIVAR]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] @@ -1262,62 +1302,62 @@ // CHECK11-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK11-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_1:%.*]], align 4 // CHECK11-NEXT: [[SST:%.*]] = alloca [[STRUCT_SST:%.*]], align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1]], align 128 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: call void @_ZN3SSTIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[SST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) -// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) -// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 +// CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 0, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 0, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 // CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 // CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 // CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 // CHECK11-NEXT: store ptr null, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 // CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP9]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 -// CHECK11-NEXT: br i1 [[TMP11]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP8]], align 8 +// CHECK11-NEXT: [[TMP9:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0 +// CHECK11-NEXT: br i1 [[TMP10]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86() #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP12]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP11]], [[OMP_OFFLOAD_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] // CHECK11: arraydestroy.done1: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP13]] +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP12]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1343,34 +1383,34 @@ // CHECK11-NEXT: store ptr [[TMP0]], ptr [[C]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP16]], align 8 -// CHECK11-NEXT: [[TMP17:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0 -// CHECK11-NEXT: br i1 [[TMP18]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CHECK11-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK11-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2SSC1ERi_l48(ptr [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1382,18 +1422,21 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..1, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -1401,21 +1444,23 @@ // CHECK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 4 // CHECK11-NEXT: store ptr [[C]], ptr [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP5]], -1 // CHECK11-NEXT: store i32 [[DEC]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 -// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP7]], 1 +// CHECK11-NEXT: store i32 [[DIV]], ptr [[TMP6]], align 4 // CHECK11-NEXT: ret void // // @@ -1489,44 +1534,48 @@ // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l86 // CHECK11-SAME: () #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @.omp_outlined..2) +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 1 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..2, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 -// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.1], align 128 +// CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_1:%.*]], align 128 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYCTOR_LOOP:%.*]] // CHECK11: arrayctor.loop: // CHECK11-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[ARRAYCTOR_LOOP]] ] // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYCTOR_CUR]], i32 1 +// CHECK11-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYCTOR_CUR]], i32 1 // CHECK11-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq ptr [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]] // CHECK11-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]] // CHECK11: arrayctor.cont: // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR]], align 128 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX1]], ptr align 128 [[VAR]], i32 4, i1 false) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN2]], i32 2 +// CHECK11-NEXT: [[ARRAY_BEGIN2:%.*]] = getelementptr inbounds [2 x %struct.S.1], ptr [[S_ARR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAY_BEGIN2]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP3]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ARRAYCTOR_CONT]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN2]] // CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE3:%.*]], label [[ARRAYDESTROY_BODY]] @@ -1550,7 +1599,7 @@ // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: store i32 0, ptr [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1568,34 +1617,34 @@ // CHECK11-NEXT: store i32 0, ptr [[A]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[THIS1]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 1, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP15]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN3SSTIiEC1Ev_l64.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0 -// CHECK11-NEXT: br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 1, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.5, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN3SSTIiEC1Ev_l64.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 +// CHECK11-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: // CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN3SSTIiEC1Ev_l64(ptr [[THIS1]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] @@ -1607,29 +1656,34 @@ // CHECK11-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 4 // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[TMP0]]) +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..3, ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[A:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[TMP:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[A]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK11-NEXT: store i32 [[INC]], ptr [[TMP3]], align 4 // CHECK11-NEXT: ret void // // @@ -1641,7 +1695,7 @@ // CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 -// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_1:%.*]], ptr [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], ptr [[F]], align 4 // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp --- a/clang/test/OpenMP/tile_codegen.cpp +++ b/clang/test/OpenMP/tile_codegen.cpp @@ -880,15 +880,17 @@ // CHECK1-LABEL: define {{[^@]+}}@foo6 // CHECK1-SAME: () #[[ATTR0]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -900,68 +902,70 @@ // CHECK1-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK1-NEXT: store i32 7, ptr [[I]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 0 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND:%.*]] // CHECK1: for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 5 // CHECK1-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] // CHECK1-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] // CHECK1: cond.true4: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false5: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 5 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], [[COND8]] // CHECK1-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK1: for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP13]], 3 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] // CHECK1-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP14]]) // CHECK1-NEXT: br label [[FOR_INC:%.*]] // CHECK1: for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: for.end: @@ -969,14 +973,14 @@ // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK1-NEXT: ret void // // @@ -1874,15 +1878,17 @@ // CHECK2-LABEL: define {{[^@]+}}@foo6 // CHECK2-SAME: () #[[ATTR2]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @.omp_outlined.) +// CHECK2-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @.omp_outlined., ptr [[OMP_OUTLINED_ARG_AGG_]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR5:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 +// CHECK2-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 @@ -1894,68 +1900,70 @@ // CHECK2-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 // CHECK2-NEXT: store i32 7, ptr [[I]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 0 +// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1]], i32 [[TMP2]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 0 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 0, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: store i32 [[TMP8]], ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND:%.*]] // CHECK2: for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 5 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 5 // CHECK2-NEXT: [[CMP3:%.*]] = icmp slt i32 4, [[ADD2]] // CHECK2-NEXT: br i1 [[CMP3]], label [[COND_TRUE4:%.*]], label [[COND_FALSE5:%.*]] // CHECK2: cond.true4: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false5: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 5 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 5 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 4, [[COND_TRUE4]] ], [ [[ADD6]], [[COND_FALSE5]] ] -// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP9]], [[COND8]] +// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 [[TMP10]], [[COND8]] // CHECK2-NEXT: br i1 [[CMP9]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] // CHECK2: for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP12]], 3 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP13]], 3 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 7, [[MUL10]] // CHECK2-NEXT: store i32 [[ADD11]], ptr [[I]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP13]]) +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I]], align 4 +// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP14]]) // CHECK2-NEXT: br label [[FOR_INC:%.*]] // CHECK2: for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4 // CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: for.end: @@ -1963,14 +1971,14 @@ // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP2]]) // CHECK2-NEXT: ret void // // diff --git a/clang/test/OpenMP/vla_crash.c b/clang/test/OpenMP/vla_crash.c --- a/clang/test/OpenMP/vla_crash.c +++ b/clang/test/OpenMP/vla_crash.c @@ -27,6 +27,7 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) @@ -34,51 +35,58 @@ // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @a, align 4 // CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[B]], ptr [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store i64 [[TMP4]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 3 +// CHECK1-NEXT: store ptr [[C]], ptr [[TMP8]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], ptr [[B]], i64 [[TMP4]], ptr [[C]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @.omp_outlined.(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]], i64 noundef [[VLA1:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8 +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 1 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr @a, align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP2]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i64 [[TMP7]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds ptr, ptr [[ARRAYIDX3]], i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARRAYIDX4]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr @a, align 4 -// CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 0, [[TMP0]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX7]], i64 0 -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[TMP0]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP9]], i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr @a, align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP6]] +// CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds ptr, ptr [[TMP10]], i64 [[TMP12]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds ptr, ptr [[ARRAYIDX1]], i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr @a, align 4 +// CHECK1-NEXT: [[IDXPROM3:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM3]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = mul nsw i64 0, [[TMP2]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP17]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[ARRAYIDX5]], i64 0 +// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX6]], align 4 // CHECK1-NEXT: ret void // // @@ -88,6 +96,7 @@ // CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[P:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) @@ -96,32 +105,38 @@ // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 // CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[P]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 0 +// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[P]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[OMP_OUTLINED_ARG_AGG_]], i32 0, i32 2 +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[TMP5]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], ptr [[P]], ptr [[A_ADDR]]) #[[ATTR2]] +// CHECK1-NEXT: call void @.omp_outlined..1(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[OMP_OUTLINED_ARG_AGG_]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[P:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 -// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[P_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp eq ptr [[TMP3]], [[TMP2]] +// CHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[TMP0]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp eq ptr [[TMP7]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK1: if.then: // CHECK1-NEXT: br label [[IF_END]] diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected --- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected +++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.generated.expected @@ -34,9 +34,10 @@ // OMP-NEXT: entry: // OMP-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // OMP-NEXT: store i32 0, i32* [[RETVAL]], align 4 // OMP-NEXT: store i32 0, i32* [[I]], align 4 -// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: call void @foo() // OMP-NEXT: ret i32 0 // @@ -45,6 +46,7 @@ // OMP-NEXT: entry: // OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// OMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon*, align 8 // OMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // OMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // OMP-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -54,61 +56,64 @@ // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 // OMP-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // OMP-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// OMP-NEXT: store %struct.anon* [[__CONTEXT:%.*]], %struct.anon** [[__CONTEXT_ADDR]], align 8 +// OMP-NEXT: [[TMP0:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR]], align 8 // OMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // OMP-NEXT: store i32 33554431, i32* [[DOTOMP_UB]], align 4 // OMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // OMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// OMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 33554431 +// OMP-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 33554431 // OMP-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // OMP: cond.true: // OMP-NEXT: br label [[COND_END:%.*]] // OMP: cond.false: -// OMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // OMP-NEXT: br label [[COND_END]] // OMP: cond.end: -// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // OMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// OMP-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// OMP-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // OMP: omp.inner.for.cond: -// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // OMP-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // OMP: omp.inner.for.body: -// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // OMP-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // OMP-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// OMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// OMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // OMP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]] // OMP-NEXT: store double 0.000000e+00, double* [[ARRAYIDX]], align 8 // OMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // OMP: omp.body.continue: // OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // OMP: omp.inner.for.inc: -// OMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // OMP-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND]] // OMP: omp.inner.for.end: // OMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // OMP: omp.loop.exit: -// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // OMP-NEXT: ret void // // // OMP-LABEL: @foo( // OMP-NEXT: entry: // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // OMP-NEXT: store i32 0, i32* [[I]], align 4 -// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: ret void // // @@ -116,6 +121,7 @@ // OMP-NEXT: entry: // OMP-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // OMP-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// OMP-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca %struct.anon.0*, align 8 // OMP-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // OMP-NEXT: [[TMP:%.*]] = alloca i32, align 4 // OMP-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 @@ -125,53 +131,55 @@ // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 // OMP-NEXT: store i32* [[DOTGLOBAL_TID_:%.*]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // OMP-NEXT: store i32* [[DOTBOUND_TID_:%.*]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// OMP-NEXT: store %struct.anon.0* [[__CONTEXT:%.*]], %struct.anon.0** [[__CONTEXT_ADDR]], align 8 +// OMP-NEXT: [[TMP0:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR]], align 8 // OMP-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // OMP-NEXT: store i32 33554431, i32* [[DOTOMP_UB]], align 4 // OMP-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // OMP-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// OMP-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// OMP-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 33554431 +// OMP-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// OMP-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// OMP-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// OMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 33554431 // OMP-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // OMP: cond.true: // OMP-NEXT: br label [[COND_END:%.*]] // OMP: cond.false: -// OMP-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // OMP-NEXT: br label [[COND_END]] // OMP: cond.end: -// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// OMP-NEXT: [[COND:%.*]] = phi i32 [ 33554431, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] // OMP-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// OMP-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// OMP-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // OMP: omp.inner.for.cond: -// OMP-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// OMP-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// OMP-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // OMP-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // OMP: omp.inner.for.body: -// OMP-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// OMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // OMP-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // OMP-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// OMP-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 -// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64 +// OMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// OMP-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // OMP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [33554432 x double], [33554432 x double]* @A, i64 0, i64 [[IDXPROM]] // OMP-NEXT: store double 1.000000e+00, double* [[ARRAYIDX]], align 8 // OMP-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // OMP: omp.body.continue: // OMP-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // OMP: omp.inner.for.inc: -// OMP-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 +// OMP-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// OMP-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 // OMP-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 // OMP-NEXT: br label [[OMP_INNER_FOR_COND]] // OMP: omp.inner.for.end: // OMP-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // OMP: omp.loop.exit: -// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// OMP-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // OMP-NEXT: ret void // // diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected --- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected +++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs.c.no-generated.expected @@ -13,9 +13,10 @@ // OMP-NEXT: entry: // OMP-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 // OMP-NEXT: store i32 0, i32* [[RETVAL]], align 4 // OMP-NEXT: store i32 0, i32* [[I]], align 4 -// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), %struct.anon* [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: call void @foo() // OMP-NEXT: ret i32 0 // @@ -62,8 +63,9 @@ // OMP-LABEL: @foo( // OMP-NEXT: entry: // OMP-NEXT: [[I:%.*]] = alloca i32, align 4 +// OMP-NEXT: [[OMP_OUTLINED_ARG_AGG_:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 1 // OMP-NEXT: store i32 0, i32* [[I]], align 4 -// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// OMP-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), %struct.anon.0* [[OMP_OUTLINED_ARG_AGG_]]) // OMP-NEXT: ret void // // NOOMP-LABEL: @foo( diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -448,7 +448,7 @@ __OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1) __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr) __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32, - VoidPtr, VoidPtr, VoidPtrPtr, SizeTy) + VoidPtr, VoidPtr, VoidPtr) __OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr) __OMP_RTL(__kmpc_kernel_end_parallel, false, Void, ) __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32) @@ -465,9 +465,8 @@ __OMP_RTL(__kmpc_alloc_shared, false, VoidPtr, SizeTy) __OMP_RTL(__kmpc_free_shared, false, Void, VoidPtr, SizeTy) -__OMP_RTL(__kmpc_begin_sharing_variables, false, Void, VoidPtrPtrPtr, SizeTy) -__OMP_RTL(__kmpc_end_sharing_variables, false, Void, ) -__OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr) +__OMP_RTL(__kmpc_alloc_aggregate_arg, false, VoidPtr, VoidPtr, VoidPtr) +__OMP_RTL(__kmpc_get_shared_variables_aggregate, false, Void, VoidPtrPtr) __OMP_RTL(__kmpc_parallel_level, false, Int8, ) __OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, ) __OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32) @@ -928,6 +927,9 @@ __OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(), ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(AllocatedPointer)))) +__OMP_RTL_ATTRS(__kmpc_alloc_aggregate_arg, DefaultAttrs, ReturnPtrAttrs, + ParamAttrs()) + __OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_aligned_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs()) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4315,6 +4315,7 @@ case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: case OMPRTL___kmpc_nvptx_end_reduce_nowait: + case OMPRTL___kmpc_alloc_aggregate_arg: break; case OMPRTL___kmpc_distribute_static_init_4: case OMPRTL___kmpc_distribute_static_init_4u: diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -77,11 +77,11 @@ ; CHECK-NEXT: ret void ; CHECK: user_code.entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) #[[ATTR1]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8* ; CHECK-NEXT: call void @helper0() #[[ATTR1]] ; CHECK-NEXT: call void @helper1() #[[ATTR1]] ; CHECK-NEXT: call void @helper2() #[[ATTR1]] -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP1]]) ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; @@ -96,11 +96,11 @@ user_code.entry: %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** + %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8* call void @helper0() call void @helper1() call void @helper2() - call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) + call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* %1) call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ret void } @@ -204,7 +204,7 @@ declare i32 @__kmpc_get_hardware_num_threads_in_block_dummy() declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) #1 declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) #1 -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) +declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8*) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll --- a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_fold.ll @@ -47,9 +47,9 @@ ; CHECK-NEXT: ret void ; CHECK: user_code.entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; CHECK-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8* ; CHECK-NEXT: call void @is_spmd_helper2() -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP1]], i64 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* [[TMP1]]) ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; @@ -64,9 +64,9 @@ user_code.entry: %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) - %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8** + %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8* call void @is_spmd_helper2() - call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** %1, i64 0) + call void @__kmpc_parallel_51(%struct.ident_t* null, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8* %1) call void @__kmpc_target_deinit(%struct.ident_t* null, i8 1, i1 true) ret void } @@ -200,7 +200,7 @@ declare i8 @__kmpc_is_spmd_exec_mode() declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1 zeroext, i1 zeroext) declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i8, i1 zeroext) -declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) +declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8*) declare i32 @__kmpc_global_thread_num(%struct.ident_t*) declare void @foo() declare void @bar() diff --git a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll --- a/llvm/test/Transforms/OpenMP/parallel_level_fold.ll +++ b/llvm/test/Transforms/OpenMP/parallel_level_fold.ll @@ -51,13 +51,13 @@ ; CHECK-LABEL: define {{[^@]+}}@parallel() { ; CHECK-NEXT: [[I:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) ; CHECK-NEXT: call void @spmd_helper() -; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8* null) ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ; CHECK-NEXT: ret void ; %i = call i32 @__kmpc_target_init(%struct.ident_t* null, i8 2, i1 false, i1 false) call void @spmd_helper() - call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8** null, i64 0) + call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 0, i32 0, i32 0, i8* null, i8* null, i8* null) call void @__kmpc_target_deinit(%struct.ident_t* null, i8 2, i1 false) ret void } @@ -106,9 +106,9 @@ ret void } -define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) { +define internal void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8*) { ; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51 -; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* [[TMP5:%.*]], i8* [[TMP6:%.*]], i8** [[TMP7:%.*]], i64 [[TMP8:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: (%struct.ident_t* [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]], i32 [[TMP4:%.*]], i8* [[TMP5:%.*]], i8* [[TMP6:%.*]], i8* [[TMP7:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: call void @parallel_helper() ; CHECK-NEXT: ret void ; diff --git a/openmp/libomptarget/DeviceRTL/include/Interface.h b/openmp/libomptarget/DeviceRTL/include/Interface.h --- a/openmp/libomptarget/DeviceRTL/include/Interface.h +++ b/openmp/libomptarget/DeviceRTL/include/Interface.h @@ -182,25 +182,22 @@ /// memory configured at launch. void *__kmpc_get_dynamic_shared(); -/// Allocate sufficient space for \p NumArgs sequential `void*` and store the -/// allocation address in \p GlobalArgs. -/// -/// Called by the main thread prior to a parallel region. -/// -/// We also remember it in GlobalArgsPtr to ensure the worker threads and -/// deallocation function know the allocation address too. -void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs); - -/// Deallocate the memory allocated by __kmpc_begin_sharing_variables. -/// -/// Called by the main thread after a parallel region. -void __kmpc_end_sharing_variables(); +/// Return a pointer in memory to store the aggregate argument of an outlined +/// parallel region, either using a local memory allocation when in SPMD mode +/// expected in \p LocalPtr, or from shared memory, expected in \p GlobalPtr. +void *__kmpc_alloc_aggregate_arg(void *LocalPtr, void *GlobalPtr); -/// Store the allocation address obtained via __kmpc_begin_sharing_variables in +/// Store the pointer of the struct aggregating shared variables in /// \p GlobalArgs. /// /// Called by the worker threads in the parallel region (function). -void __kmpc_get_shared_variables(void ***GlobalArgs); +void __kmpc_get_shared_variables_aggregate(void **GlobalArgs); + +/// Store the pointer of the struct aggregating shared variables in \p args +/// to team-shared memory for worker threads to access. +/// +/// Called by the main thread to initiate parallel region execution. +void __kmpc_set_shared_variables_aggregate(void *args); /// External interface to get the thread ID. uint32_t __kmpc_get_hardware_thread_id_in_block(); diff --git a/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen b/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen deleted file mode 100644 --- a/openmp/libomptarget/DeviceRTL/include/generated_microtask_cases.gen +++ /dev/null @@ -1,405 +0,0 @@ -case 0: -((void (*)(int32_t *, int32_t * -))fn)(&global_tid, &bound_tid -); -break; -case 1: -((void (*)(int32_t *, int32_t * -, void *))fn)(&global_tid, &bound_tid -, args[0]); -break; -case 2: -((void (*)(int32_t *, int32_t * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1]); -break; -case 3: -((void (*)(int32_t *, int32_t * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2]); -break; -case 4: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -); -break; -case 5: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4]); -break; -case 6: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5]); -break; -case 7: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6]); -break; -case 8: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -); -break; -case 9: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8]); -break; -case 10: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9]); -break; -case 11: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10]); -break; -case 12: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -); -break; -case 13: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12]); -break; -case 14: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13]); -break; -case 15: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14]); -break; -case 16: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -); -break; -case 17: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16]); -break; -case 18: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17]); -break; -case 19: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18]); -break; -case 20: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -); -break; -case 21: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20]); -break; -case 22: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21]); -break; -case 23: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22]); -break; -case 24: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -); -break; -case 25: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24]); -break; -case 26: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25]); -break; -case 27: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26]); -break; -case 28: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -); -break; -case 29: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28]); -break; -case 30: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29]); -break; -case 31: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29], args[30]); -break; -case 32: -((void (*)(int32_t *, int32_t * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -, void *, void *, void *, void * -))fn)(&global_tid, &bound_tid -, args[0], args[1], args[2], args[3] -, args[4], args[5], args[6], args[7] -, args[8], args[9], args[10], args[11] -, args[12], args[13], args[14], args[15] -, args[16], args[17], args[18], args[19] -, args[20], args[21], args[22], args[23] -, args[24], args[25], args[26], args[27] -, args[28], args[29], args[30], args[31] -); -break; diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -63,16 +63,11 @@ return NumThreads; } -// Invoke an outlined parallel function unwrapping arguments (up to 32). +// Invoke an outlined parallel function. void invokeMicrotask(int32_t global_tid, int32_t bound_tid, void *fn, - void **args, int64_t nargs) { + void *args) { DebugEntryRAII Entry(__FILE__, __LINE__, ""); - switch (nargs) { -#include "generated_microtask_cases.gen" - default: - PRINT("Too many arguments in kmp_invoke_microtask, aborting execution.\n"); - __builtin_trap(); - } + ((void (*)(int32_t *, int32_t *, void *))fn)(&global_tid, &bound_tid, args); } } // namespace @@ -81,7 +76,7 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr, int32_t num_threads, int proc_bind, void *fn, - void *wrapper_fn, void **args, int64_t nargs) { + void *wrapper_fn, void *args) { FunctionTracingRAII(); uint32_t TId = mapping::getThreadIdInBlock(); @@ -98,7 +93,8 @@ (config::mayUseNestedParallelism() && icv::Level))) { state::DateEnvironmentRAII DERAII(ident); ++icv::Level; - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); + state::exitDataEnvironment(); return; } @@ -132,7 +128,7 @@ icv::Level.assert_eq(1u, ident, /* ForceTeamState */ true); if (TId < NumThreads) - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); // Synchronize all threads at the end of a parallel region. synchronize::threadsAligned(); @@ -158,70 +154,11 @@ bool IsActiveParallelRegion = NumThreads > 1; if (!IsActiveParallelRegion) { state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true, ident); - invokeMicrotask(TId, 0, fn, args, nargs); + invokeMicrotask(TId, 0, fn, args); return; } - void **GlobalArgs = nullptr; - if (nargs) { - __kmpc_begin_sharing_variables(&GlobalArgs, nargs); - switch (nargs) { - default: - for (int I = 0; I < nargs; I++) - GlobalArgs[I] = args[I]; - break; - case 16: - GlobalArgs[15] = args[15]; - [[fallthrough]]; - case 15: - GlobalArgs[14] = args[14]; - [[fallthrough]]; - case 14: - GlobalArgs[13] = args[13]; - [[fallthrough]]; - case 13: - GlobalArgs[12] = args[12]; - [[fallthrough]]; - case 12: - GlobalArgs[11] = args[11]; - [[fallthrough]]; - case 11: - GlobalArgs[10] = args[10]; - [[fallthrough]]; - case 10: - GlobalArgs[9] = args[9]; - [[fallthrough]]; - case 9: - GlobalArgs[8] = args[8]; - [[fallthrough]]; - case 8: - GlobalArgs[7] = args[7]; - [[fallthrough]]; - case 7: - GlobalArgs[6] = args[6]; - [[fallthrough]]; - case 6: - GlobalArgs[5] = args[5]; - [[fallthrough]]; - case 5: - GlobalArgs[4] = args[4]; - [[fallthrough]]; - case 4: - GlobalArgs[3] = args[3]; - [[fallthrough]]; - case 3: - GlobalArgs[2] = args[2]; - [[fallthrough]]; - case 2: - GlobalArgs[1] = args[1]; - [[fallthrough]]; - case 1: - GlobalArgs[0] = args[0]; - [[fallthrough]]; - case 0: - break; - } - } + __kmpc_set_shared_variables_aggregate(args); { // Note that the order here is important. `icv::Level` has to be updated @@ -243,9 +180,6 @@ // Master waits for workers to signal. synchronize::threads(); } - - if (nargs) - __kmpc_end_sharing_variables(); } __attribute__((noinline)) bool diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -425,41 +425,21 @@ void *llvm_omp_get_dynamic_shared() { return __kmpc_get_dynamic_shared(); } -/// Allocate storage in shared memory to communicate arguments from the main -/// thread to the workers in generic mode. If we exceed -/// NUM_SHARED_VARIABLES_IN_SHARED_MEM we will malloc space for communication. -constexpr uint64_t NUM_SHARED_VARIABLES_IN_SHARED_MEM = 64; - -[[clang::loader_uninitialized]] static void - *SharedMemVariableSharingSpace[NUM_SHARED_VARIABLES_IN_SHARED_MEM]; -#pragma omp allocate(SharedMemVariableSharingSpace) \ - allocator(omp_pteam_mem_alloc) -[[clang::loader_uninitialized]] static void **SharedMemVariableSharingSpacePtr; -#pragma omp allocate(SharedMemVariableSharingSpacePtr) \ - allocator(omp_pteam_mem_alloc) - -void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) { - FunctionTracingRAII(); - if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) { - SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0]; - } else { - SharedMemVariableSharingSpacePtr = (void **)memory::allocGlobal( - nArgs * sizeof(void *), "new extended args"); - ASSERT(SharedMemVariableSharingSpacePtr != nullptr && - "Nullptr returned by malloc!"); - } - *GlobalArgs = SharedMemVariableSharingSpacePtr; +void *__kmpc_alloc_aggregate_arg(void *LocalPtr, void *GlobalPtr) { + return (__kmpc_is_spmd_exec_mode() ? LocalPtr : GlobalPtr); } -void __kmpc_end_sharing_variables() { - FunctionTracingRAII(); - if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0]) - memory::freeGlobal(SharedMemVariableSharingSpacePtr, "new extended args"); +/// Allocate storage in shared memory to communicate the struct aggregating +/// arguments from the main thread to the workers in generic mode. +[[clang::loader_uninitialized]] static void *SharedMemAggregatePtr[1]; +#pragma omp allocate(SharedMemAggregatePtr) allocator(omp_pteam_mem_alloc) + +void __kmpc_get_shared_variables_aggregate(void **GlobalArgs) { + *GlobalArgs = SharedMemAggregatePtr[0]; } -void __kmpc_get_shared_variables(void ***GlobalArgs) { - FunctionTracingRAII(); - *GlobalArgs = SharedMemVariableSharingSpacePtr; +void __kmpc_set_shared_variables_aggregate(void *args) { + SharedMemAggregatePtr[0] = args; } } #pragma omp end declare target diff --git a/openmp/libomptarget/utils/generate_microtask_cases.py b/openmp/libomptarget/utils/generate_microtask_cases.py deleted file mode 100755 --- a/openmp/libomptarget/utils/generate_microtask_cases.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 - -import argparse - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('--max_args', type=int, help='Max number of arguments to generate case statements for', required=True) - parser.add_argument('--output', help='Output header file to include', required=True) - args = parser.parse_args() - - output='' - for i in range(args.max_args+1): - output += 'case %d:\n'%(i) - output += '((void (*)(kmp_int32 *, kmp_int32 *\n' - for j in range(i): - output += ', void *' - if (j+1)%4 == 0: - output += '\n' - output += '))fn)(&global_tid, &bound_tid\n' - for j in range(i): - output += ', args[%d]'%(j) - if (j+1)%4 == 0: - output += '\n' - output += ');\n' - output += 'break;\n' - - with open(args.output, 'w') as f: - print(output, file=f) - -if __name__ == "__main__": - main()